Skip to content

Commit

Permalink
minor fixes as follow on for docker based MLprojects (mlflow#819)
Browse files Browse the repository at this point in the history
* minor fixes as follow on for docker based MLprojects
* review comments; fixing unit test
  • Loading branch information
mparkhe committed Jan 18, 2019
1 parent d7d6d5d commit 4951259
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 15 deletions.
7 changes: 4 additions & 3 deletions docs/source/projects.rst
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,14 @@ where ``<uri>`` is a Git repository URI or a folder. You can pass Git credential
Execution on Docker containers
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can run projects inside Docker container instead of conda environments. In order to do that
you need to specify the ``docker_env`` and ``dockerimage`` atributes in MLProject as described bellow.
It simply mounts the local directory of the project as a volume inside container in ``/mlflow/projects/code`` path.
you need to specify the ``docker_env`` along with ``image`` attributes in MLProject as described
below. It simply mounts the local directory of the project as a volume inside container in
``/mlflow/projects/code`` path.

.. code::
docker_env:
dockerimage: mlflow-run-image
image: mlflow-run-image
Iterating Quickly
-----------------
Expand Down
7 changes: 4 additions & 3 deletions mlflow/projects/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import mlflow.projects.databricks
from mlflow.utils import process
from mlflow.utils.mlflow_tags import MLFLOW_GIT_REPO_URL, MLFLOW_GIT_BRANCH_NAME
from mlflow.utils.mlflow_tags import MLFLOW_DOCKER
from mlflow.utils.mlflow_tags import MLFLOW_ENV, MLFLOW_CONDA, MLFLOW_DOCKER
from mlflow.utils.mlflow_tags import MLFLOW_DOCKER_IMAGE_NAME, MLFLOW_DOCKER_IMAGE_ID
from mlflow.utils import databricks_utils, file_utils
from mlflow.utils.logging_utils import eprint
Expand Down Expand Up @@ -99,7 +99,7 @@ def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=N
# If a docker_env attribute is defined in MLProject then it takes precedence over conda yaml
# environments, so the project will be executed inside a docker container.
if project.docker_env:
tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_DOCKER, "true")
tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_ENV, MLFLOW_DOCKER)
_validate_docker_env(project.docker_env)
_validate_docker_installation()
image = _build_docker_image(work_dir=work_dir,
Expand All @@ -109,6 +109,7 @@ def _run(uri, entry_point="main", version=None, parameters=None, experiment_id=N
# Synchronously create a conda environment (even though this may take some time)
# to avoid failures due to multiple concurrent attempts to create the same conda env.
elif use_conda:
tracking.MlflowClient().set_tag(active_run.info.run_uuid, MLFLOW_ENV, MLFLOW_CONDA)
command_separator = " && "
conda_env_name = _get_or_create_conda_env(project.conda_env_path)
command += _get_conda_command(conda_env_name)
Expand Down Expand Up @@ -691,7 +692,7 @@ def _build_docker_image(work_dir, project, active_run):
tag_name)
tracking.MlflowClient().set_tag(active_run.info.run_uuid,
MLFLOW_DOCKER_IMAGE_ID,
image[0].short_id)
image[0].id)
return tag_name


Expand Down
4 changes: 3 additions & 1 deletion mlflow/utils/mlflow_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
MLFLOW_GIT_BRANCH_NAME = "mlflow.gitBranchName"
MLFLOW_GIT_REPO_URL = "mlflow.gitRepoURL"
MLFLOW_PARENT_RUN_ID = "mlflow.parentRunId"
MLFLOW_DOCKER = "mlflow.docker"
MLFLOW_ENV = "mlflow.project.env"
MLFLOW_DOCKER = "docker"
MLFLOW_CONDA = "conda"
MLFLOW_DOCKER_IMAGE_NAME = "mlflow.docker.image.name"
MLFLOW_DOCKER_IMAGE_ID = "mlflow.docker.image.id"
14 changes: 6 additions & 8 deletions tests/projects/test_docker_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,16 @@ def test_docker_project_execution(
assert len(run.data.metrics) == len(expected_metrics)
for metric in run.data.metrics:
assert metric.value == expected_metrics[metric.key]
exact_expected_tags = {"mlflow.docker": "true"}
exact_expected_tags = {"mlflow.project.env": "docker"}
approx_expected_tags = {
"mlflow.docker.image.name": "mlflow-docker-example",
"mlflow.docker.image.id": "sha256:",
"mlflow.gitRepoURL": "https://github.com/mlflow/mlflow",
}
assert len(run.data.tags) == len(exact_expected_tags) + len(approx_expected_tags)
for tag in run.data.tags:
if tag.key in exact_expected_tags:
assert tag.value == exact_expected_tags[tag.key]
else:
assert tag.value.startswith(approx_expected_tags[tag.key])
run_tags = {tag.key: tag.value for tag in run.data.tags}
for k, v in exact_expected_tags.items():
assert run_tags[k] == v
for k, v in approx_expected_tags.items():
assert run_tags[k].startswith(v)


@pytest.mark.parametrize("tracking_uri, expected_command_segment", [
Expand Down

0 comments on commit 4951259

Please sign in to comment.