Add DB migration support + migration logic for metric x coordinates (m…

…lflow#1155) Adds support for migrating SQL databases used with MLflow Tracking via a new `mlflow db upgrade [url]` CLI command, along with a database migration for adding metric x-coordinates. Makes a breaking change to the mlflow server and mlflow ui commands, requiring users to run the database migration via `mlflow db upgrade [db_url]` against their database URL (e.g. `mlflow db upgrade sqlite:///relative/path/to/db/file`) before starting the server.
acascella · May 7, 2019 · ef34858 · ef34858
1 parent f9dd656
commit ef34858
Show file tree

Hide file tree

Showing 27 changed files with 929 additions and 57 deletions.
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
@@ -158,6 +158,29 @@ Building Protobuf Files
 To build protobuf files, simply run ``generate-protos.sh``. The required ``protoc`` version is ``3.6.0``.
 Verify that .proto files and autogenerated code are in sync by running ``./test-generate-protos.sh.``
 
+
+Database Schema Changes
+-----------------------
+MLflow's Tracking component supports storing experiment and run data in a SQL backend. To
+make changes to the tracking database schema, run the following from your
+checkout of MLflow:
+
+```
+# starting at the root of the project
+$ pwd
+~/mlflow
+$ cd mlflow
+# MLflow relies on Alembic (https://alembic.sqlalchemy.org) for schema migrations.
+$ alembic revision -m "add new field to db"
+  Generating ~/mlflow/mlflow/alembic/versions/12341123_add_new_field_to_db.py
+
+```
+
+These commands generate a new migration script (e.g. at
+~/mlflow/mlflow/alembic/versions/12341123_add_new_field_to_db.py) that you should then edit to add
+migration logic.
+
+
 Launching the Development UI
 ----------------------------
 We recommend `Running the Javascript Dev Server`_ - otherwise, the tracking frontend will request

diff --git a/docs/source/tracking.rst b/docs/source/tracking.rst
@@ -270,6 +270,17 @@ store* specified as ``./path_to_store`` or ``file:/path_to_store``, or a SQL con
 for a *database-backed store*. For the latter, the argument must be a SQL connection string
 specified as ``db_type://<user_name>:<password>@<host>:<port>/<database_name>``. Supported
 database types are ``mysql``, ``mssql``, ``sqlite``, and ``postgresql``.
+For backwards compatibility, ``--file-store`` is an alias for ``--backend-store-uri``.
+
+.. important::
+
+    ``mlflow server`` will fail against a database-backed store with an out-of-date database schema.
+    To prevent this, upgrade your database schema to the latest supported version via
+    ``mlflow db upgrade [db_uri]``. Note that schema migrations can result in database downtime, may
+    take longer on larger databases, and are not guaranteed to be transactional. As such, always
+    take a backup of your database prior to running ``mlflow db upgrade`` - consult your database's
+    documentation for instructions on taking a backup.
+
 
 By default ``--backend-store-uri`` is set to the local ``./mlruns`` directory (the same as when
 running ``mlflow run`` locally), but when running a server, make sure that this points to a

diff --git a/lint.sh b/lint.sh
@@ -5,7 +5,7 @@ set -e
 FWDIR="$(cd "`dirname $0`"; pwd)"
 cd "$FWDIR"
 
-pycodestyle --max-line-length=100 --exclude mlflow/protos,mlflow/server/js -- mlflow tests
+pycodestyle --max-line-length=100 --exclude mlflow/protos,mlflow/server/js,mlflow/alembic -- mlflow tests
 pylint --msg-template="{path} ({line},{column}): [{msg_id} {symbol}] {msg}" --rcfile="$FWDIR/pylintrc" -- mlflow tests
 
 rstcheck README.rst
diff --git a/mlflow/alembic.ini b/mlflow/alembic.ini
@@ -0,0 +1,74 @@
+# A generic, single database configuration.
+
+[alembic]
+# path to migration scripts
+script_location = alembic
+
+# template used to generate migration files
+# file_template = %%(rev)s_%%(slug)s
+
+# timezone to use when rendering the date
+# within the migration file as well as the filename.
+# string value is passed to dateutil.tz.gettz()
+# leave blank for localtime
+# timezone =
+
+# max length of characters to apply to the
+# "slug" field
+# truncate_slug_length = 40
+
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+# revision_environment = false
+
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+# sourceless = false
+
+# version location specification; this defaults
+# to alembic/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path
+# version_locations = %(here)s/bar %(here)s/bat alembic/versions
+
+# the output encoding used when revision files
+# are written from script.py.mako
+# output_encoding = utf-8
+
+sqlalchemy.url = ""
+
+
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/mlflow/alembic/README b/mlflow/alembic/README
@@ -0,0 +1,4 @@
+This directory contains configuration scripts and database migration logic for MLflow tracking
+databases, using the Alembic migration library (https://alembic.sqlalchemy.org). To run database
+migrations, use the `mlflow db upgrade` CLI command. To add and modify database migration logic,
+see the contributor guide at https://github.com/mlflow/mlflow/blob/master/CONTRIBUTING.rst.
diff --git a/mlflow/alembic/__init__.py b/mlflow/alembic/__init__.py
diff --git a/mlflow/alembic/env.py b/mlflow/alembic/env.py
@@ -0,0 +1,77 @@
+
+from logging.config import fileConfig
+
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+
+from alembic import context
+
+# this is the Alembic Config object, which provides
+# access to the values within the .ini file in use.
+config = context.config
+
+# Interpret the config file for Python logging.
+# This line sets up loggers basically.
+fileConfig(config.config_file_name)
+
+# add your model's MetaData object here
+# for 'autogenerate' support
+# from myapp import mymodel
+# target_metadata = mymodel.Base.metadata
+from mlflow.store.dbmodels.models import Base
+target_metadata = Base.metadata
+
+# other values from the config, defined by the needs of env.py,
+# can be acquired:
+# my_important_option = config.get_main_option("my_important_option")
+# ... etc.
+
+
+def run_migrations_offline():
+    """Run migrations in 'offline' mode.
+
+    This configures the context with just a URL
+    and not an Engine, though an Engine is acceptable
+    here as well.  By skipping the Engine creation
+    we don't even need a DBAPI to be available.
+
+    Calls to context.execute() here emit the given string to the
+    script output.
+
+    """
+    url = config.get_main_option("sqlalchemy.url")
+    # Try https://stackoverflow.com/questions/30378233/sqlite-lack-of-alter-support-alembic-migration-failing-because-of-this-solutio
+    context.configure(
+        url=url, target_metadata=target_metadata, literal_binds=True, render_as_batch=True
+    )
+
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def run_migrations_online():
+    """Run migrations in 'online' mode.
+
+    In this scenario we need to create an Engine
+    and associate a connection with the context.
+
+    """
+    connectable = engine_from_config(
+        config.get_section(config.config_ini_section),
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection, target_metadata=target_metadata, render_as_batch=True
+        )
+
+        with context.begin_transaction():
+            context.run_migrations()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()
diff --git a/mlflow/alembic/script.py.mako b/mlflow/alembic/script.py.mako
@@ -0,0 +1,24 @@
+"""${message}
+
+Revision ID: ${up_revision}
+Revises: ${down_revision | comma,n}
+Create Date: ${create_date}
+
+"""
+from alembic import op
+import sqlalchemy as sa
+${imports if imports else ""}
+
+# revision identifiers, used by Alembic.
+revision = ${repr(up_revision)}
+down_revision = ${repr(down_revision)}
+branch_labels = ${repr(branch_labels)}
+depends_on = ${repr(depends_on)}
+
+
+def upgrade():
+    ${upgrades if upgrades else "pass"}
+
+
+def downgrade():
+    ${downgrades if downgrades else "pass"}
diff --git a/mlflow/alembic/versions/451aebb31d03_add_metric_step.py b/mlflow/alembic/versions/451aebb31d03_add_metric_step.py
@@ -0,0 +1,35 @@
+"""add metric step
+
+Revision ID: 451aebb31d03
+Revises: 
+Create Date: 2019-04-22 15:29:24.921354
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '451aebb31d03'
+down_revision = None
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    op.add_column('metrics', sa.Column('step', sa.BigInteger(), nullable=False, server_default='0'))
+    # Use batch mode so that we can run "ALTER TABLE" statements against SQLite
+    # databases (see more info at https://alembic.sqlalchemy.org/en/latest/
+    # batch.html#running-batch-migrations-for-sqlite-and-other-databases)
+    with op.batch_alter_table("metrics") as batch_op:
+        batch_op.drop_constraint(constraint_name='metric_pk', type_="primary")
+        batch_op.create_primary_key(
+            constraint_name='metric_pk',
+            columns=['key', 'timestamp', 'step', 'run_uuid', 'value'])
+
+
+def downgrade():
+    # This migration cannot safely be downgraded; once metric data with the same
+    # (key, timestamp, run_uuid, value) are inserted (differing only in their `step`), we cannot
+    # revert to a schema where (key, timestamp, run_uuid, value) is the metric primary key.
+    pass
diff --git a/mlflow/alembic/versions/__init__.py b/mlflow/alembic/versions/__init__.py
diff --git a/mlflow/cli.py b/mlflow/cli.py
@@ -16,6 +16,8 @@
 import mlflow.rfunc.cli
 import mlflow.sagemaker.cli
 import mlflow.runs
+import mlflow.store.db.utils
+import mlflow.db
 
 from mlflow.tracking.utils import _is_local_uri
 from mlflow.utils.logging_utils import eprint
@@ -263,6 +265,7 @@ def server(backend_store_uri, default_artifact_root, host, port,
 cli.add_command(mlflow.store.cli.commands)
 cli.add_command(mlflow.azureml.cli.commands)
 cli.add_command(mlflow.runs.commands)
+cli.add_command(mlflow.db.commands)
 
 if __name__ == '__main__':
     cli()
diff --git a/mlflow/db.py b/mlflow/db.py
@@ -0,0 +1,22 @@
+import click
+
+import mlflow.store.db.utils
+
+
+@click.group("db")
+def commands():
+    """
+    Commands for managing an MLflow tracking database.
+    """
+    pass
+
+
+@commands.command()
+@click.argument("url")
+def upgrade(url):
+    """
+    Upgrade the schema of an MLflow tracking database to the latest supported version.
+    version. Note that schema migrations can be slow and are not guaranteed to be transactional -
+    always take a backup of your database before running migrations.
+    """
+    mlflow.store.db.utils._upgrade_db(url)
diff --git a/mlflow/store/db/__init__.py b/mlflow/store/db/__init__.py
diff --git a/mlflow/store/db/utils.py b/mlflow/store/db/utils.py
@@ -0,0 +1,35 @@
+import os
+
+import logging
+
+
+_logger = logging.getLogger(__name__)
+
+
+def _get_alembic_config(db_url):
+    from alembic.config import Config
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    package_dir = os.path.normpath(os.path.join(current_dir, os.pardir, os.pardir))
+    directory = os.path.join(package_dir, 'alembic')
+    config = Config(os.path.join(package_dir, 'alembic.ini'))
+    config.set_main_option('script_location', directory)
+    config.set_main_option('sqlalchemy.url', db_url)
+    return config
+
+
+def _upgrade_db(url):
+    """
+    Upgrade the schema of an MLflow tracking database to the latest supported version.
+    version. Note that schema migrations can be slow and are not guaranteed to be transactional -
+    we recommend taking a backup of your database before running migrations.
+
+    :param url Database URL, like sqlite:///<absolute-path-to-local-db-file>. See
+    https://docs.sqlalchemy.org/en/13/core/engines.html#database-urls for a full list of valid
+    database URLs.
+    """
+    # alembic adds significant import time, so we import it lazily
+    from alembic import command
+
+    _logger.info("Updating database tables at %s", url)
+    config = _get_alembic_config(url)
+    command.upgrade(config, 'heads')