Skip to content

Commit

Permalink
Model export update. (mlflow#36)
Browse files Browse the repository at this point in the history
1. MLflow is installed from pip inside the container, unless MLFLOW_DEV environment variable is set. This applies to both Azure ML and SageMaker containers.
2. The SageMaker init script is translated to Python. 
3. Refactored mlflow sagemaker code. 
   - Python api moved to sagemaker/__init__. This is to provide python api (cli 
      targets can not be easily called from python) usable e.g. from notebooks.
4. Translated some of sh script to build and push image to ECR to Python.  
5. Added test for mlflow sagemaker run-local.
  • Loading branch information
tomasatdatabricks authored and mateiz committed Jun 21, 2018
1 parent 7185dda commit ed52e68
Show file tree
Hide file tree
Showing 28 changed files with 847 additions and 422 deletions.
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
sudo: true
language: python

services:
- docker

matrix:
include:
- python: 2.7
Expand All @@ -15,6 +19,7 @@ install:
else
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
fi

- bash miniconda.sh -b -p $HOME/miniconda
- export PATH="$HOME/miniconda/bin:$PATH"
- hash -r
Expand All @@ -26,9 +31,11 @@ install:
- pip install --upgrade pip
- pip install .
- pip install -r dev-requirements.txt
- export MLFLOW_HOME=$(pwd)
script:
- pip list
- which mlflow
- echo $MLFLOW_HOME
- tox
- cd mlflow/server/js
- npm i
Expand Down
9 changes: 1 addition & 8 deletions mlflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'

# pylint: disable=wrong-import-position
import mlflow.projects as projects # noqa
import mlflow.projects as projects # noqa
import mlflow.tracking as tracking # noqa

log_param = tracking.log_param
Expand All @@ -20,14 +20,7 @@
get_tracking_uri = tracking.get_tracking_uri
create_experiment = tracking.create_experiment


run = projects.run


# get project relative path
def _relpath(*rel_path_elms):
return os.path.join(os.path.abspath(os.path.dirname(__file__)), *rel_path_elms)


__all__ = ["log_param", "log_metric", "log_artifacts", "log_artifact", "active_run",
"start_run", "end_run", "get_artifact_uri", "set_tracking_uri", "create_experiment"]
131 changes: 131 additions & 0 deletions mlflow/azureml/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import print_function


import os
import shutil

import mlflow
from mlflow import pyfunc
from mlflow.models import Model
from mlflow.tracking import _get_model_log_dir
from mlflow.utils.logging_utils import eprint
from mlflow.utils.file_utils import TempDir
from mlflow.version import VERSION as mlflow_version


def deploy(app_name, model_path, run_id, mlflow_home):
"""
Deploy MLflow model to Azure ML.
NOTE: This command is to be called from correctly initialized Azure ML environment.
At the moment this means it has to be run from console launched from Azure ML Workbench.
Caller is reponsible for setting up Azure ML environment and accounts.
NOTE: Azure ML can not handle any Conda environment. In particular python version seems to be
fixed. If the model contains Conda environment and it has been trained outside of Azure
ML, the Conda environment might need to be edited to work with Azure ML.
:param mlflow_home:
:param app_name: Name of the deployed application
:param model_path: Local or MLflow-run-relative path to the model to be exported
:param run_id: If provided, run_id is used to retrieve the model logged with MLflow.
"""
if run_id:
model_path = _get_model_log_dir(model_path, run_id)
model_path = os.path.abspath(model_path)
with TempDir(chdr=True, remove_on_exit=True):
exec_str = _export(app_name, model_path, mlflow_home=mlflow_home)
eprint("executing", '"{}"'.format(exec_str))
# Use os.system instead of subprocess due to the fact that currently all azureml commands
# have to be called within the same shell (launched from azureml workbench app by the user).
# We can change this once there is a python api (or general cli) available.
os.system(exec_str)


def export(output, model_path, run_id, mlflow_home):
"""
Export MLflow model as Azure ML compatible model ready to be deployed.
Export MLflow model out with everything needed to deploy on Azure ML.
Output includes sh script with command to deploy the generated model to Azure ML.
NOTE: This command does not need Azure ML environment to run.
NOTE: Azure ML can not handle any Conda environment. If the model contains Conda environment
and it has been trained outside of Azure ML, the Conda environment might need to be edited.
:param output: Output folder where the model is going to be exported to.
:param model_path: Local or MLflow-run-relative path to the model to be exported
:param run_id: If provided, run_id is used to retrieve model logged with MLflow.
"""
output = os.path.abspath(output)
if os.path.exists(output):
raise Exception("output folder {} already exists".format(output))
os.mkdir(output)
if run_id:
model_path = _get_model_log_dir(model_path, run_id)
model_path = os.path.abspath(model_path)
curr_dir = os.path.abspath(os.getcwd())
os.chdir(output)
try:
exec_str = _export("$1", model_path, mlflow_home=mlflow_home)
with open("create_service.sh", "w") as f:
f.write("\n".join(["#! /bin/sh", "cd {}".format(output), exec_str, ""]))
finally:
os.chdir(curr_dir)


def _export(app_name, model_path, mlflow_home):
conf = _load_conf(model_path)
score_py = "score.py" # NOTE: azure ml requires the main module to be in the current directory

with open(score_py, "w") as f:
f.write(SCORE_SRC)

deps = ""
mlflow_dep = "mlflow=={}".format(mlflow_version)

if mlflow_home:
eprint("MLFLOW_HOME =", mlflow_home)
# copy current version of mlflow
mlflow_dir = mlflow.utils.file_utils._copy_project(src_path=mlflow_home, dst_path="./")
deps = "-d {}".format(mlflow_dir)
mlflow_dep = "-e /var/azureml-app/{}".format(mlflow_dir)

with open("requirements.txt", "w") as f:
f.write(mlflow_dep + "\n")

shutil.copytree(src=model_path, dst="model")

env = "-c {}".format(os.path.join("model", conf[pyfunc.ENV])) \
if pyfunc.ENV in conf else ""
cmd = "az ml service create realtime -n {name} " + \
"--model-file model -f score.py {conda_env} {deps} -r python -p requirements.txt"
return cmd.format(name=app_name, conda_env=env, deps=deps)


def _load_conf(path):
path = os.path.abspath(path)
model = Model.load(os.path.join(path, "MLmodel"))
if pyfunc.FLAVOR_NAME not in model.flavors:
raise Exception("Currently only supports pyfunc format.")
return model.flavors[pyfunc.FLAVOR_NAME]


SCORE_SRC = """
import pandas as pd
from mlflow.pyfunc import load_pyfunc
from mlflow.utils import get_jsonable_obj
def init():
global model
model = load_pyfunc("model")
def run(s):
input_df = pd.read_json(s, orient="records")
return get_jsonable_obj(model.predict(input_df))
"""
125 changes: 27 additions & 98 deletions mlflow/azureml/cli.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
"""
CLI for azureml module.
"""
from __future__ import print_function

import os
import shutil

import click

from mlflow import pyfunc
import mlflow
import mlflow.azureml

from mlflow.models import Model
from mlflow.tracking import _get_model_log_dir

from mlflow.utils.file_utils import TempDir
from mlflow.utils import cli_args


@click.group("azureml")
Expand All @@ -23,110 +23,39 @@ def commands():
@click.option("--app-name", "-n", default=None,
help="The application name under which should this model be deployed. "
"Translates to service name on Azure ML", required=True)
@click.option("--model-path", "-m", default=None,
help="Path to the model. The path is relative to the run with the given run_id or "
"local filesystem path without run_id.", required=True)
@click.option("--run_id", "-r", default=None, help="Id of the MLflow run that contains the model.",
required=False)
def deploy(app_name, model_path, run_id):
@cli_args.MODEL_PATH
@cli_args.RUN_ID
@cli_args.MLFLOW_HOME
def deploy(app_name, model_path, run_id, mlflow_home):
"""Deploy MLflow model to Azure ML.
This command will export MLflow model into Azure ML compatible format and create a service
models this model.
NOTE: This command is to be called from correctly initialized Azure ML environment.
At the moment this means it has to be run from console launched from Azure ML Workbench.
Caller is reponsible for setting up Azure ML environment and accounts.
NOTE: This command is to be called from correctly initialized Azure ML environment.
At the moment this means it has to be run from console launched from Azure ML Workbench.
Caller is reponsible for setting up Azure ML environment and accounts.
NOTE: Azure ML can not handle any Conda environment. In particular python version seems to be
fixed. If the model contains Conda environment and it has been trained outside of Azure
ML, the Conda environment might need to be edited to work with Azure ML.
NOTE: Azure ML can not handle any Conda environment. In particular python version seems to be
fixed. If the model contains Conda environment and it has been trained outside of Azure
ML, the Conda environment might need to be edited to work with Azure ML.
"""
if run_id:
model_path = _get_model_log_dir(model_path, run_id)
model_path = os.path.abspath(model_path)
with TempDir(chdr=True, remove_on_exit=True):
exec_str = _export(app_name, model_path, "model")
print("executing", '"{}"'.format(exec_str))
# Use os.system instead of subprocess due to the fact that currently all azureml commands
# have to be called within the same shell (launched from azureml workbench app by the user).
# We can change this once there is a python api (or general cli) available.
os.system(exec_str)
mlflow.azureml.deploy(app_name=app_name, model_path=model_path, run_id=run_id,
mlflow_home=os.path.abspath(mlflow_home) if mlflow_home else None)


@commands.command("export")
@click.option("--output", "-o", default=None, help="Output directory.", required=True)
@click.option("--model-path", "-m", default=None,
help="Path to the model. The path is relative to the run with the given run_id or "
"local filesystem path without run_id.", required=True)
@click.option("--run_id", "-r", default=None, help="Id of the MLflow run that contains the model.",
required=False)
def export(output, model_path, run_id):
@cli_args.MODEL_PATH
@cli_args.RUN_ID
@cli_args.MLFLOW_HOME
def export(output, model_path, run_id, mlflow_home):
"""Export MLflow model as Azure ML compatible model ready to be deployed.
Export MLflow model out with everything needed to deploy on Azure ML.
Output includes sh script with command to deploy the generated model to Azure ML.
The generated model has no dependency on MLflow.
NOTE: This commnand does not need Azure ML environment to run.
NOTE: This command does not need Azure ML environment to run.
NOTE: Azure ML can not handle any Conda environment. In particular python version seems to be
fixed. If the model contains Conda environment and it has been trained outside of Azure
ML, the Conda environment might need to be edited.
NOTE: Azure ML can not handle any Conda environment. If the model contains Conda environment
and it has been trained outside of Azure ML, the Conda environment might need to be edited.
"""
output = os.path.abspath(output)
if os.path.exists(output):
raise Exception("output folder {} already exists".format(output))
os.mkdir(output)
if run_id:
model_path = _get_model_log_dir(model_path, run_id)
model_path = os.path.abspath(model_path)
curr_dir = os.path.abspath(os.getcwd())
os.chdir(output)
try:
exec_str = _export("$1", model_path, os.path.basename(output))
with open("create_service.sh", "w") as f:
f.write("\n".join(["#! /bin/sh", "cd {}".format(output), exec_str, ""]))
finally:
os.chdir(curr_dir)


def _export(app_name, model_path, dst_model_path):
conf = _load_conf(model_path)
score_py = "score.py" # NOTE: azure ml requires the main module to be in the current directory
loader_src = pyfunc.get_module_loader_src(model_path, dst_model_path)
with open(score_py, "w") as f:
f.write(SCORE_SRC.format(loader=loader_src))
shutil.copytree(src=model_path, dst=dst_model_path)
deps = ""
env = "-c {}".format(os.path.join(dst_model_path, conf[pyfunc.ENV])) \
if pyfunc.ENV in conf else ""
cmd = "az ml service create realtime -n {name} " + \
"--model-file {path} -f {score} {conda_env} {deps} -r python "
return cmd.format(name=app_name, path=dst_model_path, score=score_py, conda_env=env, deps=deps)


def _load_conf(path):
path = os.path.abspath(path)
model = Model.load(os.path.join(path, "MLmodel"))
if pyfunc.FLAVOR_NAME not in model.flavors:
raise Exception("Currently only supports pyfunc format.")
return model.flavors[pyfunc.FLAVOR_NAME]


SCORE_SRC = """
import pandas as pd
import json
{loader}
def init():
global model
model = load_pyfunc()
def run(s):
input_df = pd.read_json(s, orient="records")
pred = model.predict(input_df)
return json.dumps(pred.tolist())
"""
mlflow.azureml.export(output=output, model_path=model_path, run_id=run_id,
mlflow_home=os.path.abspath(mlflow_home) if mlflow_home else None)
4 changes: 3 additions & 1 deletion mlflow/pyfunc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
The convention for pyfunc models is to have a predict method or function with the following
signature
predict(data: pandas.DataFrame) -> pandas.DataFrame
predict(data: pandas.DataFrame) -> numpy.ndarray | pandas.Series | pandas.DataFrame
This convention is relied upon by other mlflow components.
Expand Down Expand Up @@ -154,6 +154,8 @@ def spark_udf(spark, path, run_id=None, result_type="double"):
Args:
spark (SparkSession): a SparkSession object
path (str): A path containing a pyfunc model.
run_id: Id of the run that produced this model.
If provided, run_id is used to retrieve the model logged with mlflow.
result_type (str): Spark UDF type returned by the model's prediction method. Default double
"""

Expand Down
Loading

0 comments on commit ed52e68

Please sign in to comment.