Skip to content

Commit

Permalink
Add experimental CLI command for building docker image capable of ser…
Browse files Browse the repository at this point in the history
…ving an MLflow model (mlflow#1329)

* WIP

* WIP

* WIP

* WIP

* WIP

* Fix lint, test also seems to pass. Need to refactor docker logic & add more tests

* Some refactoring

* Fix test

* WIP. Refactored build_image into a method within the FlavorBackend interface to allow
extending the build-docker command to other backends in the future

* Refactor utils into common module

* Fix lint

* Make 'serve' the default command in docker image

* Make 'serve' the default command in docker image

* Remove flavor arg

* some cleanups from self-review pass

* Test updates

* Fix test

* Address some review comments + improve test coverage a bit

* Minor updates.

* Minor update.

* Minor update.

* Minor update.

* Minor fixes.

* nits

* Minor fix.

* Run the new test

* Added missing refactored files.

* fixes

* Fix.

* Fix test.

* Mark Sagemaker tests back as release.

* Test fix

* container fix - conda env needs to be copied from the read only directory in sagemaker.
  • Loading branch information
smurching authored and tomasatdatabricks committed Jun 1, 2019
1 parent 4aa1d21 commit 97c9b9d
Show file tree
Hide file tree
Showing 18 changed files with 482 additions and 291 deletions.
14 changes: 14 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,22 @@
.git
mlruns
docs
apidocs
mlflow.Rcheck
outputs
examples
travis
tests
node_modules
coverage
build
npm-debug.log*
yarn-debug.log*
yarn-error.log*
__pycache__
.*
~*
*.swp
*.pyc


7 changes: 7 additions & 0 deletions mlflow/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,10 @@ def can_score_model(self):
:return: True if this flavor backend can be applied int he current environment.
"""
pass

def can_build_image(self):
"""
:return: True if this flavor has a `build_image` method defined for building a docker
container capable of serving the model, False otherwise.
"""
return callable(getattr(self.__class__, 'build_image', None))
40 changes: 38 additions & 2 deletions mlflow/models/cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import click
import os
import posixpath

from mlflow.models import Model
Expand Down Expand Up @@ -77,14 +78,49 @@ def predict(model_uri, input_path, output_path, content_type, json_format, no_co
json_format=json_format)


@commands.command("build-docker")
@cli_args.MODEL_URI
@click.option("--name", "-n", default="mlflow-pyfunc-servable",
help="Name to use for built image")
@cli_args.INSTALL_MLFLOW
def build_docker(model_uri, name, install_mlflow):
"""
**EXPERIMENTAL**: Builds a Docker image whose default entrypoint serves the specified MLflow
model at port 8080 within the container, using the 'python_function' flavor.
For example, the following command builds a docker image named 'my-image-name' that serves the
model from run 'some-run-uuid' at run-relative artifact path 'my-model':
.. code:: bash
mlflow models build-docker -m "runs:/some-run-uuid/my-model" -n "my-image-name"
We can then serve the model, exposing it at port 5001 on the host via:
.. code:: bash
docker run -p 5001:8080 "my-image-name"
See https://www.mlflow.org/docs/latest/python_api/mlflow.pyfunc.html for more information on the
'python_function' flavor.
This command is experimental (may be changed or removed in a future release without warning)
and does not guarantee that the arguments nor format of the Docker container will remain the
same.
"""
mlflow_home = os.environ.get("MLFLOW_HOME", None)
_get_flavor_backend(model_uri, docker_build=True).build_image(model_uri, name,
mlflow_home=mlflow_home,
install_mlflow=install_mlflow)


def _get_flavor_backend(model_uri, **kwargs):
with TempDir() as tmp:
local_path = _download_artifact_from_uri(posixpath.join(model_uri, "MLmodel"),
output_path=tmp.path())
model = Model.load(local_path)
flavor_name, flavor_backend = get_flavor_backend(model, **kwargs)

_logger.info("Selected backend for flavor '%s'", flavor_name)
if flavor_backend is None:
raise Exception("No suitable flavor backend was found for the model.")
_logger.info("Selected backend for flavor '%s'", flavor_name)
return flavor_backend
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
Initialize the environment and start model serving on Sagemaker or local Docker container.
Initialize the environment and start model serving in a Docker container.
To be executed only during the model deployment.
Expand All @@ -8,11 +8,10 @@

import multiprocessing
import os
import shutil
import signal
import shutil
from subprocess import check_call, Popen
import sys
import yaml

from pkg_resources import resource_filename

Expand All @@ -21,10 +20,12 @@

from mlflow import pyfunc, mleap
from mlflow.models import Model
from mlflow.models.docker_utils import DISABLE_ENV_CREATION
from mlflow.version import VERSION as MLFLOW_VERSION

MODEL_PATH = "/opt/ml/model"


DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME = "MLFLOW_DEPLOYMENT_FLAVOR_NAME"

DEFAULT_SAGEMAKER_SERVER_PORT = 8080
Expand All @@ -50,19 +51,6 @@ def _init(cmd):
args=str(sys.argv)))


def _server_dependencies_cmds():
"""
Get commands required to install packages required to serve the model with MLflow. These are
packages outside of the user-provided environment, except for the MLflow itself.
:return: List of commands.
"""
# TODO: Should we reinstall MLflow? What if there is MLflow in the user's conda environment?
return ["conda install gunicorn", "conda install gevent",
"pip install /opt/mlflow/." if _container_includes_mlflow_source()
else "pip install mlflow=={}".format(MLFLOW_VERSION)]


def _serve():
"""
Serve the model.
Expand All @@ -86,22 +74,56 @@ def _serve():
raise Exception("This container only supports models with the MLeap or PyFunc flavors.")


def _install_pyfunc_deps(model_path=None, install_mlflow=False):
"""
Creates a conda env for serving the model at the specified path and installs almost all serving
dependencies into the environment - MLflow is not installed as it's not available via conda.
"""
# If model is a pyfunc model, create its conda env (even if it also has mleap flavor)
has_env = False
if model_path:
model_config_path = os.path.join(model_path, "MLmodel")
model = Model.load(model_config_path)
# NOTE: this differs from _serve cause we always activate the env even if you're serving
# an mleap model
if pyfunc.FLAVOR_NAME not in model.flavors:
return
conf = model.flavors[pyfunc.FLAVOR_NAME]
if pyfunc.ENV in conf:
print("creating and activating custom environment")
env = conf[pyfunc.ENV]
env_path_dst = os.path.join("/opt/mlflow/", env)
env_path_dst_dir = os.path.dirname(env_path_dst)
if not os.path.exists(env_path_dst_dir):
os.makedirs(env_path_dst_dir)
shutil.copyfile(os.path.join(MODEL_PATH, env), env_path_dst)
conda_create_model_env = "conda env create -n custom_env -f {}".format(env_path_dst)
if Popen(["bash", "-c", conda_create_model_env]).wait() != 0:
raise Exception("Failed to create model environment.")
has_env = True
activate_cmd = ["source /miniconda/bin/activate custom_env"] if has_env else []
# NB: install gunicorn[gevent] from pip rather than from conda because gunicorn is already
# dependency of mlflow on pip and we expect mlflow to be part of the environment.
install_server_deps = ["pip install gunicorn[gevent]"]
if Popen(["bash", "-c", " && ".join(activate_cmd + install_server_deps)]).wait() != 0:
raise Exception("Failed to install serving dependencies into the model environment.")
if has_env and install_mlflow:
install_mlflow_cmd = [
"pip install /opt/mlflow/." if _container_includes_mlflow_source()
else "pip install mlflow=={}".format(MLFLOW_VERSION)
]
if Popen(["bash", "-c", " && ".join(activate_cmd + install_mlflow_cmd)]).wait() != 0:
raise Exception("Failed to install mlflow into the model environment.")


def _serve_pyfunc(model):
conf = model.flavors[pyfunc.FLAVOR_NAME]
bash_cmds = []
if pyfunc.ENV in conf:
print("activating custom environment")
env = conf[pyfunc.ENV]
env_path_dst = os.path.join("/opt/mlflow/", env)
env_path_dst_dir = os.path.dirname(env_path_dst)
if not os.path.exists(env_path_dst_dir):
os.makedirs(env_path_dst_dir)
# TODO: should we test that the environment does not include any of the server dependencies?
# Those are gonna be reinstalled. should probably test this on the client side
shutil.copyfile(os.path.join(MODEL_PATH, env), env_path_dst)
os.system("conda env create -n custom_env -f {}".format(env_path_dst))
bash_cmds += ["source /miniconda/bin/activate custom_env"] + _server_dependencies_cmds()
nginx_conf = resource_filename(mlflow.sagemaker.__name__, "container/scoring_server/nginx.conf")
if not os.environ.get(DISABLE_ENV_CREATION) == "true":
_install_pyfunc_deps(MODEL_PATH, install_mlflow=True)
bash_cmds += ["source /miniconda/bin/activate custom_env"]
nginx_conf = resource_filename(mlflow.models.__name__, "container/scoring_server/nginx.conf")
nginx = Popen(['nginx', '-c', nginx_conf])
# link the log streams to stdout/err so they will be logged to the container logs
check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
Expand All @@ -111,7 +133,7 @@ def _serve_pyfunc(model):
os.system("python -V")
os.system('python -c"from mlflow.version import VERSION as V; print(V)"')
cmd = ("gunicorn --timeout 60 -k gevent -b unix:/tmp/gunicorn.sock -w {nworkers} " +
"mlflow.sagemaker.container.scoring_server.wsgi:app").format(nworkers=cpu_count)
"mlflow.models.container.scoring_server.wsgi:app").format(nworkers=cpu_count)
bash_cmds.append(cmd)
gunicorn = Popen(["/bin/bash", "-c", " && ".join(bash_cmds)])
signal.signal(signal.SIGTERM, lambda a, b: _sigterm_handler(pids=[nginx.pid, gunicorn.pid]))
Expand Down
109 changes: 109 additions & 0 deletions mlflow/models/docker_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import os
from subprocess import Popen, PIPE, STDOUT
import logging

import mlflow
import mlflow.version
from mlflow.utils.file_utils import TempDir, _copy_project
from mlflow.utils.logging_utils import eprint

_logger = logging.getLogger(__name__)

DISABLE_ENV_CREATION = "MLFLOW_DISABLE_ENV_CREATION"

_DOCKERFILE_TEMPLATE = """
# Build an image that can serve mlflow models.
FROM ubuntu:16.04
RUN apt-get -y update && apt-get install -y --no-install-recommends \
wget \
curl \
nginx \
ca-certificates \
bzip2 \
build-essential \
cmake \
openjdk-8-jdk \
git-core \
maven \
&& rm -rf /var/lib/apt/lists/*
# Download and setup miniconda
RUN curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh
RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh;
ENV PATH="/miniconda/bin:$PATH"
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
# Set up the program in the image
WORKDIR /opt/mlflow
{install_mlflow}
{custom_setup_steps}
{entrypoint}
"""


def _get_mlflow_install_step(dockerfile_context_dir, mlflow_home):
"""
Get docker build commands for installing MLflow given a Docker context dir and optional source
directory
"""
if mlflow_home:
mlflow_dir = _copy_project(
src_path=mlflow_home, dst_path=dockerfile_context_dir)
return (
"COPY {mlflow_dir} /opt/mlflow\n"
"RUN pip install /opt/mlflow\n"
"RUN cd /opt/mlflow/mlflow/java/scoring && "
"mvn --batch-mode package -DskipTests && "
"mkdir -p /opt/java/jars && "
"mv /opt/mlflow/mlflow/java/scoring/target/"
"mlflow-scoring-*-with-dependencies.jar /opt/java/jars\n"
).format(mlflow_dir=mlflow_dir)
else:
return (
"RUN pip install mlflow=={version}\n"
"RUN mvn "
" --batch-mode dependency:copy"
" -Dartifact=org.mlflow:mlflow-scoring:{version}:pom"
" -DoutputDirectory=/opt/java\n"
"RUN mvn "
" --batch-mode dependency:copy"
" -Dartifact=org.mlflow:mlflow-scoring:{version}:jar"
" -DoutputDirectory=/opt/java/jars"
).format(version=mlflow.version.VERSION)


def _build_image(image_name, entrypoint, mlflow_home=None, custom_setup_steps_hook=None):
"""
Build an MLflow Docker image that can be used to serve a
The image is built locally and it requires Docker to run.
:param image_name: Docker image name.
:param entry_point: String containing ENTRYPOINT directive for docker image
:param mlflow_home: (Optional) Path to a local copy of the MLflow GitHub repository.
If specified, the image will install MLflow from this directory.
If None, it will install MLflow from pip.
:param custom_setup_steps_hook: (Optional) Single-argument function that takes the string path
of a dockerfile context directory and returns a string containing Dockerfile commands to
run during the image build step.
"""
mlflow_home = os.path.abspath(mlflow_home) if mlflow_home else None
with TempDir() as tmp:
cwd = tmp.path()
install_mlflow = _get_mlflow_install_step(cwd, mlflow_home)
custom_setup_steps = custom_setup_steps_hook(cwd) if custom_setup_steps_hook else ""
with open(os.path.join(cwd, "Dockerfile"), "w") as f:
f.write(_DOCKERFILE_TEMPLATE.format(
install_mlflow=install_mlflow, custom_setup_steps=custom_setup_steps,
entrypoint=entrypoint))
_logger.info("Building docker image with name %s", image_name)
os.system('find {cwd}/'.format(cwd=cwd))
proc = Popen(["docker", "build", "-t", image_name, "-f", "Dockerfile", "."],
cwd=cwd,
stdout=PIPE,
stderr=STDOUT,
universal_newlines=True)
for x in iter(proc.stdout.readline, ""):
eprint(x, end='')
9 changes: 5 additions & 4 deletions mlflow/models/flavor_backend_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
Registry of supported flavor backends. Contains a mapping of flavors to flavor backends. This
mapping is used to select suitable flavor when deploying generic MLflow models.
Flavor backend can deploy particular flavor locally to generate predictions or to deploy as a local
REST api endpoint. Not all flavors have a flavor backend.
Flavor backend can deploy particular flavor locally to generate predictions, deploy as a local
REST api endpoint, or build a docker image for serving the model locally or remotely.
Not all flavors have a flavor backend.
"""
import mlflow.pyfunc as pyfunc
from mlflow.pyfunc.backend import PyFuncBackend
Expand All @@ -16,10 +17,10 @@
}


def get_flavor_backend(model, **kwargs):
def get_flavor_backend(model, build_docker=True, **kwargs):
for flavor_name, flavor_config in model.flavors.items():
if flavor_name in _flavor_backends:
backend = _flavor_backends[flavor_name](flavor_config, **kwargs)
if backend.can_score_model():
if build_docker and backend.can_build_image() or backend.can_score_model():
return flavor_name, backend
return None, None
Loading

0 comments on commit 97c9b9d

Please sign in to comment.