Add experimental CLI command for building docker image capable of ser…

…ving an MLflow model (mlflow#1329) * WIP * WIP * WIP * WIP * WIP * Fix lint, test also seems to pass. Need to refactor docker logic & add more tests * Some refactoring * Fix test * WIP. Refactored build_image into a method within the FlavorBackend interface to allow extending the build-docker command to other backends in the future * Refactor utils into common module * Fix lint * Make 'serve' the default command in docker image * Make 'serve' the default command in docker image * Remove flavor arg * some cleanups from self-review pass * Test updates * Fix test * Address some review comments + improve test coverage a bit * Minor updates. * Minor update. * Minor update. * Minor update. * Minor fixes. * nits * Minor fix. * Run the new test * Added missing refactored files. * fixes * Fix. * Fix test. * Mark Sagemaker tests back as release. * Test fix * container fix - conda env needs to be copied from the read only directory in sagemaker.
nlaille · Jun 1, 2019 · 97c9b9d · 97c9b9d
1 parent 4aa1d21
commit 97c9b9d
Show file tree

Hide file tree

Showing 18 changed files with 482 additions and 291 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -1,8 +1,22 @@
 .git
 mlruns
+docs
+apidocs
+mlflow.Rcheck
 outputs
+examples
+travis
+tests
+node_modules
+coverage
+build
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
 __pycache__
 .*
 ~*
 *.swp
 *.pyc
+
+
diff --git a/mlflow/models/__init__.py b/mlflow/models/__init__.py
@@ -129,3 +129,10 @@ def can_score_model(self):
         :return: True if this flavor backend can be applied int he current environment.
         """
         pass
+
+    def can_build_image(self):
+        """
+        :return: True if this flavor has a `build_image` method defined for building a docker
+        container capable of serving the model, False otherwise.
+        """
+        return callable(getattr(self.__class__, 'build_image', None))
diff --git a/mlflow/models/cli.py b/mlflow/models/cli.py
@@ -1,5 +1,6 @@
 import logging
 import click
+import os
 import posixpath
 
 from mlflow.models import Model
@@ -77,14 +78,49 @@ def predict(model_uri, input_path, output_path, content_type, json_format, no_co
                                                                       json_format=json_format)
 
 
+@commands.command("build-docker")
+@cli_args.MODEL_URI
+@click.option("--name", "-n", default="mlflow-pyfunc-servable",
+              help="Name to use for built image")
+@cli_args.INSTALL_MLFLOW
+def build_docker(model_uri, name, install_mlflow):
+    """
+    **EXPERIMENTAL**: Builds a Docker image whose default entrypoint serves the specified MLflow
+    model at port 8080 within the container, using the 'python_function' flavor.
+
+    For example, the following command builds a docker image named 'my-image-name' that serves the
+    model from run 'some-run-uuid' at run-relative artifact path 'my-model':
+
+    .. code:: bash
+
+        mlflow models build-docker -m "runs:/some-run-uuid/my-model" -n "my-image-name"
+
+    We can then serve the model, exposing it at port 5001 on the host via:
+
+    .. code:: bash
+
+        docker run -p 5001:8080 "my-image-name"
+
+    See https://www.mlflow.org/docs/latest/python_api/mlflow.pyfunc.html for more information on the
+    'python_function' flavor.
+
+    This command is experimental (may be changed or removed in a future release without warning)
+    and does not guarantee that the arguments nor format of the Docker container will remain the
+    same.
+    """
+    mlflow_home = os.environ.get("MLFLOW_HOME", None)
+    _get_flavor_backend(model_uri, docker_build=True).build_image(model_uri, name,
+                                                                  mlflow_home=mlflow_home,
+                                                                  install_mlflow=install_mlflow)
+
+
 def _get_flavor_backend(model_uri, **kwargs):
     with TempDir() as tmp:
         local_path = _download_artifact_from_uri(posixpath.join(model_uri, "MLmodel"),
                                                  output_path=tmp.path())
         model = Model.load(local_path)
     flavor_name, flavor_backend = get_flavor_backend(model, **kwargs)
-
-    _logger.info("Selected backend for flavor '%s'", flavor_name)
     if flavor_backend is None:
         raise Exception("No suitable flavor backend was found for the model.")
+    _logger.info("Selected backend for flavor '%s'", flavor_name)
     return flavor_backend
diff --git a/mlflow/sagemaker/container/__init__.py → mlflow/models/container/__init__.py b/mlflow/sagemaker/container/__init__.py → mlflow/models/container/__init__.py
@@ -1,5 +1,5 @@
 """
-Initialize the environment and start model serving on Sagemaker or local Docker container.
+Initialize the environment and start model serving in a Docker container.
 
 To be executed only during the model deployment.
 
@@ -8,11 +8,10 @@
 
 import multiprocessing
 import os
-import shutil
 import signal
+import shutil
 from subprocess import check_call, Popen
 import sys
-import yaml
 
 from pkg_resources import resource_filename
 
@@ -21,10 +20,12 @@
 
 from mlflow import pyfunc, mleap
 from mlflow.models import Model
+from mlflow.models.docker_utils import DISABLE_ENV_CREATION
 from mlflow.version import VERSION as MLFLOW_VERSION
 
 MODEL_PATH = "/opt/ml/model"
 
+
 DEPLOYMENT_CONFIG_KEY_FLAVOR_NAME = "MLFLOW_DEPLOYMENT_FLAVOR_NAME"
 
 DEFAULT_SAGEMAKER_SERVER_PORT = 8080
@@ -50,19 +51,6 @@ def _init(cmd):
                                                                                 args=str(sys.argv)))
 
 
-def _server_dependencies_cmds():
-    """
-    Get commands required to install packages required to serve the model with MLflow. These are
-    packages outside of the user-provided environment, except for the MLflow itself.
-
-    :return: List of commands.
-    """
-    # TODO: Should we reinstall MLflow? What if there is MLflow in the user's conda environment?
-    return ["conda install gunicorn", "conda install gevent",
-            "pip install /opt/mlflow/." if _container_includes_mlflow_source()
-            else "pip install mlflow=={}".format(MLFLOW_VERSION)]
-
-
 def _serve():
     """
     Serve the model.
@@ -86,22 +74,56 @@ def _serve():
         raise Exception("This container only supports models with the MLeap or PyFunc flavors.")
 
 
+def _install_pyfunc_deps(model_path=None, install_mlflow=False):
+    """
+    Creates a conda env for serving the model at the specified path and installs almost all serving
+    dependencies into the environment - MLflow is not installed as it's not available via conda.
+    """
+    # If model is a pyfunc model, create its conda env (even if it also has mleap flavor)
+    has_env = False
+    if model_path:
+        model_config_path = os.path.join(model_path, "MLmodel")
+        model = Model.load(model_config_path)
+        # NOTE: this differs from _serve cause we always activate the env even if you're serving
+        # an mleap model
+        if pyfunc.FLAVOR_NAME not in model.flavors:
+            return
+        conf = model.flavors[pyfunc.FLAVOR_NAME]
+        if pyfunc.ENV in conf:
+            print("creating and activating custom environment")
+            env = conf[pyfunc.ENV]
+            env_path_dst = os.path.join("/opt/mlflow/", env)
+            env_path_dst_dir = os.path.dirname(env_path_dst)
+            if not os.path.exists(env_path_dst_dir):
+                os.makedirs(env_path_dst_dir)
+            shutil.copyfile(os.path.join(MODEL_PATH, env), env_path_dst)
+            conda_create_model_env = "conda env create -n custom_env -f {}".format(env_path_dst)
+            if Popen(["bash", "-c", conda_create_model_env]).wait() != 0:
+                raise Exception("Failed to create model environment.")
+            has_env = True
+    activate_cmd = ["source /miniconda/bin/activate custom_env"] if has_env else []
+    # NB: install gunicorn[gevent] from pip rather than from conda because gunicorn is already
+    # dependency of mlflow on pip and we expect mlflow to be part of the environment.
+    install_server_deps = ["pip install gunicorn[gevent]"]
+    if Popen(["bash", "-c", " && ".join(activate_cmd + install_server_deps)]).wait() != 0:
+        raise Exception("Failed to install serving dependencies into the model environment.")
+    if has_env and install_mlflow:
+        install_mlflow_cmd = [
+            "pip install /opt/mlflow/." if _container_includes_mlflow_source()
+            else "pip install mlflow=={}".format(MLFLOW_VERSION)
+        ]
+        if Popen(["bash", "-c", " && ".join(activate_cmd + install_mlflow_cmd)]).wait() != 0:
+            raise Exception("Failed to install mlflow into the model environment.")
+
+
 def _serve_pyfunc(model):
     conf = model.flavors[pyfunc.FLAVOR_NAME]
     bash_cmds = []
     if pyfunc.ENV in conf:
-        print("activating custom environment")
-        env = conf[pyfunc.ENV]
-        env_path_dst = os.path.join("/opt/mlflow/", env)
-        env_path_dst_dir = os.path.dirname(env_path_dst)
-        if not os.path.exists(env_path_dst_dir):
-            os.makedirs(env_path_dst_dir)
-        # TODO: should we test that the environment does not include any of the server dependencies?
-        # Those are gonna be reinstalled. should probably test this on the client side
-        shutil.copyfile(os.path.join(MODEL_PATH, env), env_path_dst)
-        os.system("conda env create -n custom_env -f {}".format(env_path_dst))
-        bash_cmds += ["source /miniconda/bin/activate custom_env"] + _server_dependencies_cmds()
-    nginx_conf = resource_filename(mlflow.sagemaker.__name__, "container/scoring_server/nginx.conf")
+        if not os.environ.get(DISABLE_ENV_CREATION) == "true":
+            _install_pyfunc_deps(MODEL_PATH, install_mlflow=True)
+        bash_cmds += ["source /miniconda/bin/activate custom_env"]
+    nginx_conf = resource_filename(mlflow.models.__name__, "container/scoring_server/nginx.conf")
     nginx = Popen(['nginx', '-c', nginx_conf])
     # link the log streams to stdout/err so they will be logged to the container logs
     check_call(['ln', '-sf', '/dev/stdout', '/var/log/nginx/access.log'])
@@ -111,7 +133,7 @@ def _serve_pyfunc(model):
     os.system("python -V")
     os.system('python -c"from mlflow.version import VERSION as V; print(V)"')
     cmd = ("gunicorn --timeout 60 -k gevent -b unix:/tmp/gunicorn.sock -w {nworkers} " +
-           "mlflow.sagemaker.container.scoring_server.wsgi:app").format(nworkers=cpu_count)
+           "mlflow.models.container.scoring_server.wsgi:app").format(nworkers=cpu_count)
     bash_cmds.append(cmd)
     gunicorn = Popen(["/bin/bash", "-c", " && ".join(bash_cmds)])
     signal.signal(signal.SIGTERM, lambda a, b: _sigterm_handler(pids=[nginx.pid, gunicorn.pid]))

diff --git a/...aker/container/scoring_server/__init__.py → ...dels/container/scoring_server/__init__.py b/...aker/container/scoring_server/__init__.py → ...dels/container/scoring_server/__init__.py
diff --git a/...maker/container/scoring_server/nginx.conf → ...odels/container/scoring_server/nginx.conf b/...maker/container/scoring_server/nginx.conf → ...odels/container/scoring_server/nginx.conf
diff --git a/...agemaker/container/scoring_server/wsgi.py → ...w/models/container/scoring_server/wsgi.py b/...agemaker/container/scoring_server/wsgi.py → ...w/models/container/scoring_server/wsgi.py
diff --git a/mlflow/models/docker_utils.py b/mlflow/models/docker_utils.py
@@ -0,0 +1,109 @@
+import os
+from subprocess import Popen, PIPE, STDOUT
+import logging
+
+import mlflow
+import mlflow.version
+from mlflow.utils.file_utils import TempDir, _copy_project
+from mlflow.utils.logging_utils import eprint
+
+_logger = logging.getLogger(__name__)
+
+DISABLE_ENV_CREATION = "MLFLOW_DISABLE_ENV_CREATION"
+
+_DOCKERFILE_TEMPLATE = """
+# Build an image that can serve mlflow models.
+FROM ubuntu:16.04
+
+RUN apt-get -y update && apt-get install -y --no-install-recommends \
+         wget \
+         curl \
+         nginx \
+         ca-certificates \
+         bzip2 \
+         build-essential \
+         cmake \
+         openjdk-8-jdk \
+         git-core \
+         maven \
+    && rm -rf /var/lib/apt/lists/*
+
+# Download and setup miniconda
+RUN curl https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh >> miniconda.sh
+RUN bash ./miniconda.sh -b -p /miniconda; rm ./miniconda.sh;
+ENV PATH="/miniconda/bin:$PATH"
+ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
+
+# Set up the program in the image
+WORKDIR /opt/mlflow
+
+{install_mlflow}
+
+{custom_setup_steps}
+{entrypoint}
+"""
+
+
+def _get_mlflow_install_step(dockerfile_context_dir, mlflow_home):
+    """
+    Get docker build commands for installing MLflow given a Docker context dir and optional source
+    directory
+    """
+    if mlflow_home:
+        mlflow_dir = _copy_project(
+            src_path=mlflow_home, dst_path=dockerfile_context_dir)
+        return (
+            "COPY {mlflow_dir} /opt/mlflow\n"
+            "RUN pip install /opt/mlflow\n"
+            "RUN cd /opt/mlflow/mlflow/java/scoring && "
+            "mvn --batch-mode package -DskipTests && "
+            "mkdir -p /opt/java/jars && "
+            "mv /opt/mlflow/mlflow/java/scoring/target/"
+            "mlflow-scoring-*-with-dependencies.jar /opt/java/jars\n"
+        ).format(mlflow_dir=mlflow_dir)
+    else:
+        return (
+            "RUN pip install mlflow=={version}\n"
+            "RUN mvn "
+            " --batch-mode dependency:copy"
+            " -Dartifact=org.mlflow:mlflow-scoring:{version}:pom"
+            " -DoutputDirectory=/opt/java\n"
+            "RUN mvn "
+            " --batch-mode dependency:copy"
+            " -Dartifact=org.mlflow:mlflow-scoring:{version}:jar"
+            " -DoutputDirectory=/opt/java/jars"
+        ).format(version=mlflow.version.VERSION)
+
+
+def _build_image(image_name, entrypoint, mlflow_home=None, custom_setup_steps_hook=None):
+    """
+    Build an MLflow Docker image that can be used to serve a
+    The image is built locally and it requires Docker to run.
+
+    :param image_name: Docker image name.
+    :param entry_point: String containing ENTRYPOINT directive for docker image
+    :param mlflow_home: (Optional) Path to a local copy of the MLflow GitHub repository.
+                        If specified, the image will install MLflow from this directory.
+                        If None, it will install MLflow from pip.
+    :param custom_setup_steps_hook: (Optional) Single-argument function that takes the string path
+           of a dockerfile context directory and returns a string containing Dockerfile commands to
+           run during the image build step.
+    """
+    mlflow_home = os.path.abspath(mlflow_home) if mlflow_home else None
+    with TempDir() as tmp:
+        cwd = tmp.path()
+        install_mlflow = _get_mlflow_install_step(cwd, mlflow_home)
+        custom_setup_steps = custom_setup_steps_hook(cwd) if custom_setup_steps_hook else ""
+        with open(os.path.join(cwd, "Dockerfile"), "w") as f:
+            f.write(_DOCKERFILE_TEMPLATE.format(
+                install_mlflow=install_mlflow, custom_setup_steps=custom_setup_steps,
+                entrypoint=entrypoint))
+        _logger.info("Building docker image with name %s", image_name)
+        os.system('find {cwd}/'.format(cwd=cwd))
+        proc = Popen(["docker", "build", "-t", image_name, "-f", "Dockerfile", "."],
+                     cwd=cwd,
+                     stdout=PIPE,
+                     stderr=STDOUT,
+                     universal_newlines=True)
+        for x in iter(proc.stdout.readline, ""):
+            eprint(x, end='')
diff --git a/mlflow/models/flavor_backend_registry.py b/mlflow/models/flavor_backend_registry.py
@@ -2,8 +2,9 @@
 Registry of supported flavor backends. Contains a mapping of flavors to flavor backends. This
 mapping is used to select suitable flavor when deploying generic MLflow models.
 
-Flavor backend can deploy particular flavor locally to generate predictions or to deploy as a local
-REST api endpoint. Not all flavors have a flavor backend.
+Flavor backend can deploy particular flavor locally to generate predictions, deploy as a local
+REST api endpoint, or build a docker image for serving the model locally or remotely.
+Not all flavors have a flavor backend.
 """
 import mlflow.pyfunc as pyfunc
 from mlflow.pyfunc.backend import PyFuncBackend
@@ -16,10 +17,10 @@
 }
 
 
-def get_flavor_backend(model, **kwargs):
+def get_flavor_backend(model, build_docker=True, **kwargs):
     for flavor_name, flavor_config in model.flavors.items():
         if flavor_name in _flavor_backends:
             backend = _flavor_backends[flavor_name](flavor_config, **kwargs)
-            if backend.can_score_model():
+            if build_docker and backend.can_build_image() or backend.can_score_model():
                 return flavor_name, backend
     return None, None