Skip to content

Commit

Permalink
Update fetch_project to no longer remove mlruns from working directory (
Browse files Browse the repository at this point in the history
  • Loading branch information
smurching authored and aarondav committed Jul 30, 2018
1 parent fd36126 commit db79c58
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 90 deletions.
5 changes: 0 additions & 5 deletions mlflow/projects/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,11 +197,6 @@ def _fetch_project(uri, subdirectory, version, dst_dir, git_username, git_passwo
if uri != dst_dir:
dir_util.copy_tree(src=uri, dst=dst_dir)

# Make sure they don't have an outputs or mlruns directory (will need to change if we change
# how we log results locally)
shutil.rmtree(os.path.join(dst_dir, "outputs"), ignore_errors=True)
shutil.rmtree(os.path.join(dst_dir, "mlruns"), ignore_errors=True)

# Make sure there is a MLproject file in the specified working directory.
if not os.path.isfile(os.path.join(dst_dir, subdirectory, "MLproject")):
if subdirectory == '':
Expand Down
167 changes: 82 additions & 85 deletions tests/projects/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from mlflow.entities.run_status import RunStatus
from mlflow.projects import ExecutionException
from mlflow.store.file_store import FileStore
from mlflow.utils.file_utils import TempDir
from mlflow.utils import env

from tests.projects.utils import TEST_PROJECT_DIR, GIT_PROJECT_URI, TEST_DIR, validate_exit_status
from tests.projects.utils import tracking_uri_mock # pylint: disable=unused-import


def _assert_dirs_equal(expected, actual):
Expand Down Expand Up @@ -88,6 +88,18 @@ def test_fetch_project(tmpdir):
dst_dir=dst_dir, git_username=None, git_password=None)


def test_dont_remove_mlruns(tmpdir):
# Fetching a directory containing an "mlruns" folder doesn't remove the "mlruns" folder
src_dir = tmpdir.mkdir("mlruns-src-dir")
src_dir.mkdir("mlruns").join("some-file.txt").write("hi")
src_dir.join("MLproject").write("dummy MLproject contents")
dst_dir_path = tmpdir.join("mlruns-work-dir").strpath
mlflow.projects._fetch_project(
uri=src_dir.strpath, subdirectory="", version=None, dst_dir=dst_dir_path, git_username=None,
git_password=None)
_assert_dirs_equal(expected=src_dir.strpath, actual=dst_dir_path)


def test_parse_subdirectory():
# Make sure the parsing works as intended.
test_uri = "uri#subdirectory"
Expand All @@ -101,26 +113,68 @@ def test_parse_subdirectory():
mlflow.projects._parse_subdirectory(period_fail_uri)


def test_invalid_run_mode():
def test_invalid_run_mode(tracking_uri_mock): # pylint: disable=unused-argument
""" Verify that we raise an exception given an invalid run mode """
with TempDir() as tmp, mock.patch("mlflow.tracking.get_tracking_uri") as get_tracking_uri_mock:
get_tracking_uri_mock.return_value = tmp.path()
with pytest.raises(ExecutionException):
mlflow.projects.run(uri=TEST_PROJECT_DIR, mode="some unsupported mode")
with pytest.raises(ExecutionException):
mlflow.projects.run(uri=TEST_PROJECT_DIR, mode="some unsupported mode")


def test_use_conda():
def test_use_conda(tracking_uri_mock): # pylint: disable=unused-argument
""" Verify that we correctly handle the `use_conda` argument."""
with TempDir() as tmp, mock.patch("mlflow.tracking.get_tracking_uri") as get_tracking_uri_mock:
get_tracking_uri_mock.return_value = tmp.path()
# Verify we throw an exception when conda is unavailable
old_path = os.environ["PATH"]
env.unset_variable("PATH")
try:
with pytest.raises(ExecutionException):
mlflow.projects.run(TEST_PROJECT_DIR, use_conda=True)
finally:
os.environ["PATH"] = old_path
# Verify we throw an exception when conda is unavailable
old_path = os.environ["PATH"]
env.unset_variable("PATH")
try:
with pytest.raises(ExecutionException):
mlflow.projects.run(TEST_PROJECT_DIR, use_conda=True)
finally:
os.environ["PATH"] = old_path


@pytest.mark.skip(reason="flaky running in travis")
@pytest.mark.parametrize("use_start_run", map(str, [0, 1]))
def test_run(tmpdir, tracking_uri_mock, use_start_run): # pylint: disable=unused-argument
submitted_run = mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="test_tracking",
parameters={"use_start_run": use_start_run},
use_conda=False, experiment_id=0)
# Blocking runs should be finished when they return
validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
# Test that we can call wait() on a synchronous run & that the run has the correct
# status after calling wait().
submitted_run.wait()
validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
# Validate run contents in the FileStore
run_uuid = submitted_run.run_id
store = FileStore(tmpdir.strpath)
run_infos = store.list_run_infos(experiment_id=0)
assert len(run_infos) == 1
store_run_uuid = run_infos[0].run_uuid
assert run_uuid == store_run_uuid
run = store.get_run(run_uuid)
expected_params = {"use_start_run": use_start_run}
assert run.info.status == RunStatus.FINISHED
assert len(run.data.params) == len(expected_params)
for param in run.data.params:
assert param.value == expected_params[param.key]
expected_metrics = {"some_key": 3}
for metric in run.data.metrics:
assert metric.value == expected_metrics[metric.key]


@pytest.mark.skip(reason="flaky running in travis")
def test_run_async(tracking_uri_mock): # pylint: disable=unused-argument
submitted_run0 = mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="sleep", parameters={"duration": 2},
use_conda=False, experiment_id=0, block=False)
validate_exit_status(submitted_run0.get_status(), RunStatus.RUNNING)
submitted_run0.wait()
validate_exit_status(submitted_run0.get_status(), RunStatus.FINISHED)
submitted_run1 = mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="sleep", parameters={"duration": -1, "invalid-param": 30},
use_conda=False, experiment_id=0, block=False)
submitted_run1.wait()
validate_exit_status(submitted_run1.get_status(), RunStatus.FAILED)


@pytest.mark.parametrize(
Expand All @@ -134,71 +188,15 @@ def test_conda_path(mock_env, expected):


@pytest.mark.skip(reason="flaky running in travis")
def test_run():
for use_start_run in map(str, [0, 1]):
with TempDir() as tmp, mock.patch("mlflow.tracking.get_tracking_uri")\
as get_tracking_uri_mock:
tmp_dir = tmp.path()
get_tracking_uri_mock.return_value = tmp_dir
submitted_run = mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="test_tracking",
parameters={"use_start_run": use_start_run},
use_conda=False, experiment_id=0)
# Blocking runs should be finished when they return
validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
# Test that we can call wait() on a synchronous run & that the run has the correct
# status after calling wait().
submitted_run.wait()
validate_exit_status(submitted_run.get_status(), RunStatus.FINISHED)
# Validate run contents in the FileStore
run_uuid = submitted_run.run_id
store = FileStore(tmp_dir)
run_infos = store.list_run_infos(experiment_id=0)
assert len(run_infos) == 1
store_run_uuid = run_infos[0].run_uuid
assert run_uuid == store_run_uuid
run = store.get_run(run_uuid)
expected_params = {"use_start_run": use_start_run}
assert run.info.status == RunStatus.FINISHED
assert len(run.data.params) == len(expected_params)
for param in run.data.params:
assert param.value == expected_params[param.key]
expected_metrics = {"some_key": 3}
for metric in run.data.metrics:
assert metric.value == expected_metrics[metric.key]


@pytest.mark.skip(reason="flaky running in travis")
def test_run_async():
with TempDir() as tmp, mock.patch("mlflow.tracking.get_tracking_uri") as get_tracking_uri_mock:
tmp_dir = tmp.path()
get_tracking_uri_mock.return_value = tmp_dir
submitted_run0 = mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="sleep", parameters={"duration": 2},
use_conda=False, experiment_id=0, block=False)
validate_exit_status(submitted_run0.get_status(), RunStatus.RUNNING)
submitted_run0.wait()
validate_exit_status(submitted_run0.get_status(), RunStatus.FINISHED)
submitted_run1 = mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="sleep", parameters={"duration": -1, "invalid-param": 30},
use_conda=False, experiment_id=0, block=False)
submitted_run1.wait()
validate_exit_status(submitted_run1.get_status(), RunStatus.FAILED)


@pytest.mark.skip(reason="flaky running in travis")
def test_cancel_run():
with TempDir() as tmp, mock.patch("mlflow.tracking.get_tracking_uri") as get_tracking_uri_mock:
tmp_dir = tmp.path()
get_tracking_uri_mock.return_value = tmp_dir
submitted_run0, submitted_run1 = [mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="sleep", parameters={"duration": 2},
use_conda=False, experiment_id=0, block=False) for _ in range(2)]
submitted_run0.cancel()
validate_exit_status(submitted_run0.get_status(), RunStatus.FAILED)
# Sanity check: cancelling one run has no effect on the other
submitted_run1.wait()
validate_exit_status(submitted_run1.get_status(), RunStatus.FINISHED)
def test_cancel_run(tracking_uri_mock): # pylint: disable=unused-argument
submitted_run0, submitted_run1 = [mlflow.projects.run(
TEST_PROJECT_DIR, entry_point="sleep", parameters={"duration": 2},
use_conda=False, experiment_id=0, block=False) for _ in range(2)]
submitted_run0.cancel()
validate_exit_status(submitted_run0.get_status(), RunStatus.FAILED)
# Sanity check: cancelling one run has no effect on the other
submitted_run1.wait()
validate_exit_status(submitted_run1.get_status(), RunStatus.FINISHED)


def test_get_dest_dir():
Expand All @@ -212,11 +210,10 @@ def test_get_dest_dir():
os.path.abspath(TEST_PROJECT_DIR)


def test_storage_dir():
def test_storage_dir(tmpdir):
"""
Test that we correctly handle the `storage_dir` argument, which specifies where to download
distributed artifacts passed to arguments of type `path`.
"""
with TempDir() as tmp_dir:
assert os.path.dirname(mlflow.projects._get_storage_dir(tmp_dir.path())) == tmp_dir.path()
assert os.path.dirname(mlflow.projects._get_storage_dir(tmpdir.strpath)) == tmpdir.strpath
assert os.path.dirname(mlflow.projects._get_storage_dir(None)) == tempfile.gettempdir()

0 comments on commit db79c58

Please sign in to comment.