minor fixes as follow on for docker based MLprojects (mlflow#819)

* minor fixes as follow on for docker based MLprojects * review comments; fixing unit test
wolliq · Jan 18, 2019 · 4951259 · 4951259
1 parent d7d6d5d
commit 4951259
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 15 deletions.
diff --git a/docs/source/projects.rst b/docs/source/projects.rst
@@ -229,13 +229,14 @@ where ``<uri>`` is a Git repository URI or a folder. You can pass Git credential
 Execution on Docker containers
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 You can run projects inside Docker container instead of conda environments. In order to do that 
-you need to specify the ``docker_env`` and ``dockerimage`` atributes in MLProject as described bellow. 
-It simply mounts the local directory of the project as a volume inside container in ``/mlflow/projects/code`` path.
+you need to specify the ``docker_env`` along with ``image`` attributes in MLProject as described
+below. It simply mounts the local directory of the project as a volume inside container in
+``/mlflow/projects/code`` path.
 
 .. code::
 
     docker_env:
-        dockerimage: mlflow-run-image
+        image: mlflow-run-image
 
 Iterating Quickly
 -----------------

diff --git a/mlflow/projects/__init__.py b/mlflow/projects/__init__.py
@@ -27,7 +27,7 @@
 import mlflow.projects.databricks
 from mlflow.utils import process
 from mlflow.utils.mlflow_tags import MLFLOW_GIT_REPO_URL, MLFLOW_GIT_BRANCH_NAME
-from mlflow.utils.mlflow_tags import MLFLOW_DOCKER
+from mlflow.utils.mlflow_tags import MLFLOW_ENV, MLFLOW_CONDA, MLFLOW_DOCKER
 from mlflow.utils.mlflow_tags import MLFLOW_DOCKER_IMAGE_NAME, MLFLOW_DOCKER_IMAGE_ID
 from mlflow.utils import databricks_utils, file_utils
 from mlflow.utils.logging_utils import eprint
@@ -99,7 +99,7 @@ def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=N
         # If a docker_env attribute is defined in MLProject then it takes precedence over conda yaml
         # environments, so the project will be executed inside a docker container.
         if project.docker_env:
-            tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_DOCKER, "true")
+            tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_ENV, MLFLOW_DOCKER)
             _validate_docker_env(project.docker_env)
             _validate_docker_installation()
             image = _build_docker_image(work_dir=work_dir,
@@ -109,6 +109,7 @@ def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=N
         # Synchronously create a conda environment (even though this may take some time)
         # to avoid failures due to multiple concurrent attempts to create the same conda env.
         elif use_conda:
+            tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_ENV, MLFLOW_CONDA)
             command_separator = " && "
             conda_env_name = _get_or_create_conda_env(project.conda_env_path)
             command += _get_conda_command(conda_env_name)
@@ -691,7 +692,7 @@ def _build_docker_image(work_dir, project, active_run):
                                     tag_name)
     tracking.MlflowClient().set_tag(active_run.info.run_uuid,
                                     MLFLOW_DOCKER_IMAGE_ID,
-                                    image[0].short_id)
+                                    image[0].id)
     return tag_name
 
 

diff --git a/mlflow/utils/mlflow_tags.py b/mlflow/utils/mlflow_tags.py
@@ -11,6 +11,8 @@
 MLFLOW_GIT_BRANCH_NAME = "mlflow.gitBranchName"
 MLFLOW_GIT_REPO_URL = "mlflow.gitRepoURL"
 MLFLOW_PARENT_RUN_ID = "mlflow.parentRunId"
-MLFLOW_DOCKER = "mlflow.docker"
+MLFLOW_ENV = "mlflow.project.env"
+MLFLOW_DOCKER = "docker"
+MLFLOW_CONDA = "conda"
 MLFLOW_DOCKER_IMAGE_NAME = "mlflow.docker.image.name"
 MLFLOW_DOCKER_IMAGE_ID = "mlflow.docker.image.id"
diff --git a/tests/projects/test_docker_projects.py b/tests/projects/test_docker_projects.py
@@ -51,18 +51,16 @@ def test_docker_project_execution(
     assert len(run.data.metrics) == len(expected_metrics)
     for metric in run.data.metrics:
         assert metric.value == expected_metrics[metric.key]
-    exact_expected_tags = {"mlflow.docker": "true"}
+    exact_expected_tags = {"mlflow.project.env": "docker"}
     approx_expected_tags = {
         "mlflow.docker.image.name": "mlflow-docker-example",
         "mlflow.docker.image.id": "sha256:",
-        "mlflow.gitRepoURL": "https://github.com/mlflow/mlflow",
     }
-    assert len(run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
-    for tag in run.data.tags:
-        if tag.key in exact_expected_tags:
-            assert tag.value == exact_expected_tags[tag.key]
-        else:
-            assert tag.value.startswith(approx_expected_tags[tag.key])
+    run_tags = {tag.key: tag.value for tag in run.data.tags}
+    for k, v in exact_expected_tags.items():
+        assert run_tags[k] == v
+    for k, v in approx_expected_tags.items():
+        assert run_tags[k].startswith(v)
 
 
 @pytest.mark.parametrize("tracking_uri, expected_command_segment", [