diff --git a/doc/cephadm/administration.rst b/doc/cephadm/administration.rst
deleted file mode 100644
index afc1dd21cfec0..0000000000000
--- a/doc/cephadm/administration.rst
+++ /dev/null
@@ -1,265 +0,0 @@
-.. _cephadm-administration:
-
-======================
-cephadm Administration
-======================
-
-
-Configuration
-=============
-
-The cephadm orchestrator can be configured to use an SSH configuration file. This is
-useful for specifying private keys and other SSH connection options.
-
-::
-
-    # ceph config set mgr mgr/cephadm/ssh_config_file /path/to/config
-
-An SSH configuration file can be provided without requiring an accessible file
-system path as the method above does.
-
-::
-
-    # ceph cephadm set-ssh-config -i /path/to/config
-
-To clear this value use the command:
-
-::
-
-    # ceph cephadm clear-ssh-config
-
-Health checks
-=============
-
-CEPHADM_PAUSED
---------------
-
-Cephadm background work has been paused with ``ceph orch pause``.  Cephadm
-will continue to perform passive monitoring activities (like checking
-host and daemon status), but it will not make any changes (like deploying
-or removing daemons).
-
-You can resume cephadm work with::
-
-  ceph orch resume
-
-CEPHADM_STRAY_HOST
-------------------
-
-One or more hosts have running Ceph daemons but are not registered as
-hosts managed by *cephadm*.  This means that those services cannot
-currently be managed by cephadm (e.g., restarted, upgraded, included
-in `ceph orch ps`).
-
-You can manage the host(s) with::
-
-  ceph orch host add *<hostname>*
-
-Note that you may need to configure SSH access to the remote host
-before this will work.
-
-Alternatively, you can manually connect to the host and ensure that
-services on that host are removed and/or migrated to a host that is
-managed by *cephadm*.
-
-You can also disable this warning entirely with::
-
-  ceph config set mgr mgr/cephadm/warn_on_stray_hosts false
-
-CEPHADM_STRAY_DAEMON
---------------------
-
-One or more Ceph daemons are running but not are not managed by
-*cephadm*, perhaps because they were deploy using a different tool, or
-were started manually.  This means that those services cannot
-currently be managed by cephadm (e.g., restarted, upgraded, included
-in `ceph orch ps`).
-
-**FIXME:** We need to implement and document an adopt procedure here.
-
-You can also disable this warning entirely with::
-
-  ceph config set mgr mgr/cephadm/warn_on_stray_daemons false
-
-CEPHADM_HOST_CHECK_FAILED
--------------------------
-
-One or more hosts have failed the basic cephadm host check, which verifies
-that (1) the host is reachable and cephadm can be executed there, and (2)
-that the host satisfies basic prerequisites, like a working container
-runtime (podman or docker) and working time synchronization.
-If this test fails, cephadm will no be able to manage services on that host.
-
-You can manually run this check with::
-
-  ceph cephadm check-host *<hostname>*
-
-You can remove a broken host from management with::
-
-  ceph orch host rm *<hostname>*
-
-You can disable this health warning with::
-
-  ceph config set mgr mgr/cephadm/warn_on_failed_host_check false
-
-
-Converting an existing cluster to cephadm
-=========================================
-
-Cephadm allows you to (pretty) easily convert an existing Ceph cluster that
-has been deployed with ceph-deploy, ceph-ansible, DeepSea, or similar tools.
-
-Limitations
------------
-
-* Cephadm only works with BlueStore OSDs.  If there are FileStore OSDs
-  in your cluster you cannot manage them.
-
-Adoption Process
-----------------
-
-#. Get the ``cephadm`` command line too on each host.  You can do this with curl or by installing the package.  The simplest approach is::
-
-     [each host] # curl --silent --remote-name --location https://github.com/ceph/ceph/raw/master/src/cephadm/cephadm
-     [each host] # chmod +x cephadm
-
-#. Prepare each host for use by ``cephadm``::
-
-     [each host] # ./cephadm prepare-host
-
-#. List all Ceph daemons on the current host::
-
-     # ./cephadm ls
-
-   You should see that all existing daemons have a type of ``legacy``
-   in the resulting output.
-
-#. Determine which Ceph version you will use.  You can use any Octopus
-   release or later.  For example, ``docker.io/ceph/ceph:v15.2.0``.  The default
-   will be the latest stable release, but if you are upgrading from an earlier
-   release at the same time be sure to refer to the upgrade notes for any
-   special steps to take while upgrading.
-
-   The image is passed to cephadm with::
-
-     # ./cephadm --image $IMAGE <rest of command goes here>
-
-#. Adopt each monitor::
-
-     # ./cephadm adopt --style legacy --name mon.<hostname>
-
-#. Adopt each manager::
-
-     # ./cephadm adopt --style legacy --name mgr.<hostname>
-
-#. Enable cephadm::
-
-     # ceph mgr module enable cephadm
-     # ceph orch set backend cephadm
-
-#. Generate an SSH key::
-
-     # ceph cephadm generate-key
-     # ceph cephadm get-pub-key
-
-#. Install the SSH key on each host to be managed::
-
-     # echo <ssh key here> | sudo tee /root/.ssh/authorized_keys
-
-   Note that ``/root/.ssh/authorized_keys`` should have mode ``0600`` and
-   ``/root/.ssh`` should have mode ``0700``.
-
-#. Tell cephadm which hosts to manage::
-
-     # ceph orch host add <hostname> [ip-address]
-
-   This will perform a ``cephadm check-host`` on each host before
-   adding it to ensure it is working.  The IP address argument is only
-   required if DNS doesn't allow you to connect to each host by it's
-   short name.
-
-#. Verify that the monitor and manager daemons are visible::
-
-     # ceph orch ps
-
-#. Adopt all remainingg daemons::
-
-      # ./cephadm adopt --style legacy --name <osd.0>
-      # ./cephadm adopt --style legacy --name <osd.1>
-      # ./cephadm adopt --style legacy --name <mds.foo>
-
-   Repeat for each host and daemon.
-
-#. Check the ``ceph health detail`` output for cephadm warnings about
-   stray cluster daemons or hosts that are not yet managed.
-   
-Troubleshooting
-===============
-
-Sometimes there is a need to investigate why a cephadm command failed or why
-a specific service no longer runs properly.
-
-As cephadm deploys daemons as containers, troubleshooting daemons is slightly
-different. Here are a few tools and commands to help investigating issues.
-
-Gathering log files
--------------------
-
-Use journalctl to gather the log files of all daemons:
-
-.. note:: By default cephadm now stores logs in journald. This means
-   that you will no longer find daemon logs in ``/var/log/ceph/``.
-
-To read the log file of one specific daemon, run::
-
-    cephadm logs --name <name-of-daemon>
-
-Note: this only works when run on the same host where the daemon is running. To
-get logs of a daemon running on a different host, give the ``--fsid`` option::
-
-    cephadm logs --fsid <fsid> --name <name-of-daemon>
-
-Where the ``<fsid>`` corresponds to the cluster id printed by ``ceph status``.
-
-To fetch all log files of all daemons on a given host, run::
-
-    for name in $(cephadm ls | jq -r '.[].name') ; do
-      cephadm logs --fsid <fsid> --name "$name" > $name;
-    done
-
-Collecting systemd status
--------------------------
-
-To print the state of a systemd unit, run::
-
-      systemctl status "ceph-$(cephadm shell ceph fsid)@<service name>.service";
-
-
-To fetch all state of all daemons of a given host, run::
-
-    fsid="$(cephadm shell ceph fsid)"
-    for name in $(cephadm ls | jq -r '.[].name') ; do
-      systemctl status "ceph-$fsid@$name.service" > $name;
-    done
-
-
-List all downloaded container images
-------------------------------------
-
-To list all container images that are downloaded on a host:
-
-.. note:: ``Image`` might also be called `ImageID`
-
-::
-
-    podman ps -a --format json | jq '.[].Image'
-    "docker.io/library/centos:8"
-    "registry.opensuse.org/opensuse/leap:15.2"
-
-
-Manually running containers
----------------------------
-
-cephadm writes small wrappers that run a containers. Refer to
-``/var/lib/ceph/<cluster-fsid>/<service-name>/unit.run`` for the container execution command.
-to execute a container.
diff --git a/doc/cephadm/adoption.rst b/doc/cephadm/adoption.rst
new file mode 100644
index 0000000000000..f94e46325fde9
--- /dev/null
+++ b/doc/cephadm/adoption.rst
@@ -0,0 +1,142 @@
+.. _cephadm-adoption:
+
+Converting an existing cluster to cephadm
+=========================================
+
+Cephadm allows you to convert an existing Ceph cluster that
+has been deployed with ceph-deploy, ceph-ansible, DeepSea, or similar tools.
+
+Limitations
+-----------
+
+* Cephadm only works with BlueStore OSDs.  If there are FileStore OSDs
+  in your cluster you cannot manage them.
+
+Preparation
+-----------
+
+#. Get the ``cephadm`` command line tool on each host in the existing
+   cluster.  See :ref:`get-cephadm`.
+
+#. Prepare each host for use by ``cephadm``::
+
+     # cephadm prepare-host
+
+#. Determine which Ceph version you will use.  You can use any Octopus (15.2.z)
+   release or later.  For example, ``docker.io/ceph/ceph:v15.2.0``.  The default
+   will be the latest stable release, but if you are upgrading from an earlier
+   release at the same time be sure to refer to the upgrade notes for any
+   special steps to take while upgrading.
+
+   The image is passed to cephadm with::
+
+     # cephadm --image $IMAGE <rest of command goes here>
+
+#. Cephadm can provide a list of all Ceph daemons on the current host::
+
+     # cephadm ls
+
+   Before starting, you should see that all existing daemons have a
+   style of ``legacy`` in the resulting output.  As the adoption
+   process progresses, adopted daemons will appear as style
+   ``cephadm:v1``.
+
+
+Adoption process
+----------------
+
+#. Ensure the ceph configuration is migrated to use the cluster config database.
+   If the ``/etc/ceph/ceph.conf`` is identical on each host, then on one host::
+
+     # ceph config assimilate-conf -i /etc/ceph/ceph.conf
+
+   If there are config variations on each host, you may need to repeat
+   this command on each host.  You can view the cluster's
+   configuration to confirm that it is complete with::
+
+     # ceph config dump
+
+#. Adopt each monitor::
+
+     # cephadm adopt --style legacy --name mon.<hostname>
+
+   Each legacy monitor should stop, quickly restart as a cephadm
+   container, and rejoin the quorum.
+
+#. Adopt each manager::
+
+     # cephadm adopt --style legacy --name mgr.<hostname>
+
+#. Enable cephadm::
+
+     # ceph mgr module enable cephadm
+     # ceph orch set backend cephadm
+
+#. Generate an SSH key::
+
+     # ceph cephadm generate-key
+     # ceph cephadm get-pub-key > ceph.pub
+
+#. Install the cluster SSH key on each host in the cluster::
+
+     # ssh-copy-id -f -i ceph.pub root@<host>
+
+#. Tell cephadm which hosts to manage::
+
+     # ceph orch host add <hostname> [ip-address]
+
+   This will perform a ``cephadm check-host`` on each host before
+   adding it to ensure it is working.  The IP address argument is only
+   required if DNS does not allow you to connect to each host by its
+   short name.
+
+#. Verify that the adopted monitor and manager daemons are visible::
+
+     # ceph orch ps
+
+#. Adopt all OSDs in the cluster::
+
+     # cephadm adopt --style legacy --name <name>
+
+   For example::
+
+     # cephadm adopt --style legacy --name osd.1
+     # cephadm adopt --style legacy --name osd.2
+
+#. Redeploy MDS daemons by telling cephadm how many daemons to run for
+   each file system.  You can list file systems by name with ``ceph fs
+   ls``.  For each file system::
+
+     # ceph orch apply mds <fs-name> <num-daemons>
+
+   For example, in a cluster with a single file system called `foo`::
+
+     # ceph fs ls
+     name: foo, metadata pool: foo_metadata, data pools: [foo_data ]
+     # ceph orch apply mds foo 2
+
+   Wait for the new MDS daemons to start with::
+
+     # ceph orch ps --daemon-type mds
+
+   Finally, stop and remove the legacy MDS daemons::
+
+     # systemctl stop ceph-mds.target
+     # rm -rf /var/lib/ceph/mds/ceph-*
+
+#. Redeploy RGW daemons.  Cephadm manages RGW daemons by zone.  For each
+   zone, deploy new RGW daemons with cephadm::
+
+     # ceph orch apply rgw <realm> <zone> <placement> [--port <port>] [--ssl]
+
+   where *<placement>* can be a simple daemon count, or a list of
+   specific hosts (see :ref:`orchestrator-cli-placement-spec`).
+
+   Once the daemons have started and you have confirmed they are functioning,
+   stop and remove the old legacy daemons::
+
+     # systemctl stop ceph-rgw.target
+     # rm -rf /var/lib/ceph/radosgw/ceph-*
+
+#. Check the ``ceph health detail`` output for cephadm warnings about
+   stray cluster daemons or hosts that are not yet managed.
diff --git a/doc/cephadm/drivegroups.rst b/doc/cephadm/drivegroups.rst
index b0032bfa3dcbc..8497559f486eb 100644
--- a/doc/cephadm/drivegroups.rst
+++ b/doc/cephadm/drivegroups.rst
@@ -1,3 +1,5 @@
+.. _drivegroups:
+
 ===========
 DriveGroups
 ===========
diff --git a/doc/cephadm/index.rst b/doc/cephadm/index.rst
index ddce3b3302564..4a0a9230fbd78 100644
--- a/doc/cephadm/index.rst
+++ b/doc/cephadm/index.rst
@@ -1,297 +1,32 @@
-.. _cephadm-bootstrap:
+.. _cephadm:
 
-========================
- Installation (cephadm)
-========================
+=======
+Cephadm
+=======
 
-.. note:: The *cephadm* bootstrap feature is first introduced in Octopus, and is not yet recommended for production deployments.
+Cephadm deploys and manages a Ceph cluster by connection to hosts from the
+manager daemon via SSH to add, remove, or update Ceph daemon containers.  It
+does not rely on external configuration or orchestration tools like Ansible,
+Rook, or Salt.
 
-cephadm manages nodes in a cluster by establishing an SSH connection
-and issues explicit management commands. It does not rely on
-separate systems such as Rook or Ansible.
+Cephadm starts by bootstrapping a tiny Ceph cluster on a single node
+(one monitor and one manager) and then uses the orchestration
+interface ("day 2" commands) to expand the cluster to include all
+hosts and to provision all Ceph daemons and services.  This can be
+performed via the Ceph command-line interface (CLI) or dashboard
+(GUI).
 
-A new Ceph cluster is deployed by bootstrapping a cluster on a single
-node, and then adding additional nodes and daemons via the CLI or GUI
-dashboard.
-
-The following example installs a basic three-node cluster. Each
-node will be identified by its prompt. For example, "[monitor 1]"
-identifies the first monitor, "[monitor 2]" identifies the second
-monitor, and "[monitor 3]" identifies the third monitor. This
-information is provided in order to make clear which commands
-should be issued on which systems.
-
-"[any node]" identifies any Ceph node, and in the context
-of this installation guide means that the associated command
-can be run on any node.
-
-Requirements
-============
-
-- Podman or Docker
-- LVM2
-
-.. highlight:: console
-
-Get cephadm
-===========
-
-The ``cephadm`` utility is used to bootstrap a new Ceph Cluster.
-
-Use curl to fetch the standalone script::
-
-  [monitor 1] # curl --silent --remote-name --location https://github.com/ceph/ceph/raw/master/src/cephadm/cephadm
-  [monitor 1] # chmod +x cephadm
-  
-You can also get the utility by installing a package provided by
-your Linux distribution::
-
-   [monitor 1] # apt install -y cephadm   # or
-   [monitor 1] # dnf install -y cephadm   # or
-   [monitor 1] # yum install -y cephadm   # or
-   [monitor 1] # zypper install -y cephadm
-
-
-Bootstrap a new cluster
-=======================
-
-To create a new cluster, you need to know which *IP address* to use
-for the cluster's first monitor.  This is normally just the IP for the
-first cluster node.  If there are multiple networks and interfaces, be
-sure to choose one that will be accessible by any hosts accessing the
-Ceph cluster.
-
-To bootstrap the cluster run the following command::
-
-  [node 1] $ sudo ./cephadm bootstrap --mon-ip *<mon-ip>*
-
-This command does a few things:
-
-* Creates a monitor and manager daemon for the new cluster on the
-  local host.  A minimal configuration file needed to communicate with
-  the new cluster is written to ``ceph.conf`` in the local directory.
-* A copy of the ``client.admin`` administrative (privileged!) secret
-  key is written to ``ceph.client.admin.keyring`` in the local directory.
-* Generates a new SSH key, and adds the public key to the local root user's
-  ``/root/.ssh/authorized_keys`` file.  A copy of the public key is written
-  to ``ceph.pub`` in the current directory.
-
-Interacting with the cluster
-============================
-
-To interact with your cluster, start up a container that has all of 
-the Ceph packages installed::
-
-  [any node] $ sudo ./cephadm shell --config ceph.conf --keyring ceph.client.admin.keyring
-
-The ``--config`` and ``--keyring`` arguments will bind those local
-files to the default locations in ``/etc/ceph`` inside the container
-to allow the ``ceph`` CLI utility to work without additional
-arguments.  Inside the container, you can check the cluster status with::
-
-  [ceph: root@monitor_1_hostname /]# ceph status
-
-In order to interact with the Ceph cluster outside of a container
-(that is, from the command line), install the Ceph
-client packages and install the configuration and privileged 
-administrator key in a global location::
-
-   [any node] $ sudo apt install -y ceph-common   # or,
-   [any node] $ sudo dnf install -y ceph-common   # or,
-   [any node] $ sudo yum install -y ceph-common
-
-   [any node] $ sudo install -m 0644 ceph.conf /etc/ceph/ceph.conf
-   [any node] $ sudo install -m 0600 ceph.keyring /etc/ceph/ceph.keyring
-
-Watching cephadm log messages
-=============================
-
-Cephadm logs to the ``cephadm`` cluster log channel, which means you can monitor progress in realtime with::
-
-  # ceph -W cephadm
-
-By default it will show info-level events and above.  To see
-debug-level messages too::
-
-  # ceph config set mgr mgr/cephadm/log_to_cluster_level debug
-  # ceph -W cephadm --watch-debug
-
-Be careful: the debug messages are very verbose!
-
-You can see recent events with::
-
-  # ceph log last cephadm
-
-These events are also logged to the ``ceph.cephadm.log`` file on
-monitor hosts and/or to the monitor-daemon stderr.
-
-Adding hosts to the cluster
-===========================
-
-For each new host you'd like to add to the cluster, you need to do two things:
-
-#. Install the cluster's public SSH key in the new host's root user's
-   ``authorized_keys`` file.  This is easy with the ``ssh-copy-id`` script::
-
-     [monitor 1] # ssh-copy-id -f -i ceph.pub root@*newhost*
-
-#. Tell Ceph that the new node is part of the cluster::
-
-     # ceph orch host add *newhost*
-
-Deploying additional monitors
-=============================
-
-Normally a Ceph cluster has three or five monitor daemons spread
-across different hosts.  As a rule of thumb, you should deploy five
-monitors if there are five or more nodes in your cluster.
-
-.. _CIDR: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation
-
-If all of your monitors will exist on the same IP subnet, cephadm can
-automatically scale the number of monitors.  This subnet should be
-specified in `CIDR`_ format (e.g., ``10.1.2.0/24``).  (If you do not
-specify a subnet, you will need to manually specify an IP or subnet
-when creating each monitor.)::
-
-  # ceph config set mon public_network *<mon-cidr-network>*
-
-For example::
-
-  # ceph config set mon public_network 10.1.2.0/24
-
-There are several ways to add additional monitors:
-
-* You can simply tell cephadm how many monitors you want, and it will pick the
-  hosts (randomly)::
-
-    # ceph orch apply mon *<number-of-monitors>*
-
-  For example, if you have 5 or more hosts added to the cluster,::
-
-    # ceph orch apply mon 5
-
-* You can explicitly specify which hosts to deploy on.  Be sure to include
-  the first monitor host in this list.::
-
-    # ceph orch apply mon *<host1,host2,host3,...>*
-
-  For example,::
-
-    # ceph orch apply mon host1,host2,host3
-
-* You can control which hosts the monitors run on by adding the ``mon`` label
-  to the appropriate hosts::
-
-    # ceph orch host label add *<hostname>* mon
-
-  To view the current hosts and labels,::
-
-    # ceph orch host ls
-
-  For example::
-
-    # ceph orch host label add host1 mon
-    # ceph orch host label add host2 mon
-    # ceph orch host label add host3 mon
-    # ceph orch host ls
-    HOST   ADDR   LABELS  STATUS  
-    host1         mon             
-    host2         mon             
-    host3         mon             
-    host4
-    host5
-
-  Then tell cephadm to deploy monitors based on the label::
-
-    # ceph orch apply mon label:mon
-
-* You can explicitly specify the IP address or CIDR for each monitor
-  and control where it is placed.  This is the only supported method
-  if you did not specify the CIDR monitor network above.
-
-  To deploy additional monitors,::
-
-    # ceph orch daemon add mon *<host1:ip-or-network1> [<host1:ip-or-network-2>...]*
-
-  For example, to deploy a second monitor on ``newhost1`` using an IP
-  address ``10.1.2.123`` and a third monitor on ``newhost2`` in
-  network ``10.1.2.0/24``,::
-
-    # ceph orch daemon add mon newhost1:10.1.2.123
-    # ceph orch daemon add mon newhost2:10.1.2.0/24
-
-Deploying OSDs
-==============
-
-To add OSDs to the cluster, you have two options:
-
-#. You need to know the device name for the block device (hard disk or
-SSD) that will be used.  Then,::
-
-     # ceph orch osd create *<host>*:*<path-to-device>*
-
-   For example, to deploy an OSD on host *newhost*'s SSD,::
-
-     # ceph orch osd create newhost:/dev/disk/by-id/ata-WDC_WDS200T2B0A-00SM50_182294800028
-
-
-#. You need to describe your disk setup by it's properties (Drive Groups)
-
-   Link to DriveGroup docs.::
-
-    # ceph orch osd create -i my_drivegroups.yml
-
-
-.. _drivegroups: drivegroups::
-
-Deploying manager daemons
-=========================
-
-It is a good idea to have at least one backup manager daemon.  To
-deploy one or more new manager daemons,::
-
-  # ceph orch apply mgr *<new-num-mgrs>* [*<host1>* ...]
-
-Deploying MDSs
-==============
-
-One or more MDS daemons is required to use the CephFS file system.
-These are created automatically if the newer ``ceph fs volume``
-interface is used to create a new file system.  For more information,
-see :ref:`fs-volumes-and-subvolumes`.
-
-To deploy metadata servers,::
-
-  # ceph orch apply mds *<fs-name>* *<num-daemons>* [*<host1>* ...]
-
-Deploying RGWs
-==============
-
-Cephadm deploys radosgw as a collection of daemons that manage a
-particular *realm* and *zone*.  (For more information about realms and
-zones, see :ref:`multisite`.)  To deploy a set of radosgw daemons for
-a particular realm and zone,::
-
-  # ceph orch apply rgw *<realm-name>* *<zone-name>* *<num-daemons>* [*<host1>* ...]
-
-Note that with cephadm, radosgw daemons are configured via the monitor
-configuration database instead of via a `ceph.conf` or the command line.  If
-that confiruation isn't already in place (usually in the
-``client.rgw.<realmname>.<zonename>`` section), then the radosgw
-daemons will start up with default settings (e.g., binding to port
-80).
-
-
-Further Reading
-===============
+Cephadm is new in the Octopus v15.2.0 release and does not support older
+versions of Ceph.
 
 .. toctree::
     :maxdepth: 2
 
-    Cephadm administration <administration>
+    install
+    adoption
+    upgrade
+    Cephadm operations <operations>
     Cephadm monitoring <monitoring>
     Cephadm CLI <../mgr/orchestrator>
     DriveGroups <drivegroups>
-    OS recommendations <../start/os-recommendations>
-    
+    troubleshooting
diff --git a/doc/cephadm/install.rst b/doc/cephadm/install.rst
new file mode 100644
index 0000000000000..42c135e55560e
--- /dev/null
+++ b/doc/cephadm/install.rst
@@ -0,0 +1,322 @@
+============================
+Deploying a new Ceph cluster
+============================
+
+Cephadm creates a new Ceph cluster by "bootstrapping" on a single
+host, expanding the cluster to encompass any additional hosts, and
+then deploying the needed services.
+
+.. highlight:: console
+
+Requirements
+============
+
+- Systemd
+- Podman or Docker for running containers
+- Time synchronization (such as chrony or NTP)
+- LVM2 for provisioning storage devices
+
+Any modern Linux distribution should be sufficient.  Dependencies
+are installed automatically by the bootstrap process below.
+
+.. _get-cephadm:
+
+Install cephadm
+===============
+
+The ``cephadm`` command can (1) bootstrap a new cluster, (2)
+launch a containerized shell with a working Ceph CLI, and (3) aid in
+debugging containerized Ceph daemons.
+
+There are a few ways to install cephadm:
+
+* Use ``curl`` to fetch the most recent version of the
+  standalone script::
+
+    # curl --silent --remote-name --location https://github.com/ceph/ceph/raw/octopus/src/cephadm/cephadm
+    # chmod +x cephadm
+
+  This script can be run directly from the current directory with::
+
+    # ./cephadm <arguments...>
+
+* Although the standalone script is sufficient to get a cluster started, it is
+  convenient to have the ``cephadm`` command installed on the host.  To install
+  these packages for the current Octopus release::
+
+    # ./cephadm add-repo --release octopus
+    # ./cephadm install
+
+  Confirm that ``cephadm`` is now in your PATH with::
+
+    # which cephadm
+
+* Some commercial Linux distributions (e.g., RHEL, SLE) may already
+  include up-to-date Ceph packages.  In that case, you can install
+  cephadm directly.  For example::
+
+    # dnf install -y cephadm     # or
+    # zypper install -y cephadm
+
+
+
+Bootstrap a new cluster
+=======================
+
+You need to know which *IP address* to use for the cluster's first
+monitor daemon.  This is normally just the IP for the first host.  If there
+are multiple networks and interfaces, be sure to choose one that will
+be accessible by any host accessing the Ceph cluster.
+
+To bootstrap the cluster::
+
+  # mkdir -p /etc/ceph
+  # cephadm bootstrap --mon-ip *<mon-ip>*
+
+This command will:
+
+* Create a monitor and manager daemon for the new cluster on the local
+  host.
+* Generate a new SSH key for the Ceph cluster and adds it to the root
+  user's ``/root/.ssh/authorized_keys`` file.
+* Write a minimal configuration file needed to communicate with the
+  new cluster to ``/etc/ceph/ceph.conf``.
+* Write a copy of the ``client.admin`` administrative (privileged!)
+  secret key to ``/etc/ceph/ceph.client.admin.keyring``.
+* Write a copy of the public key to
+  ``/etc/ceph/ceph.pub``.
+
+The default bootstrap behavior will work for the vast majority of
+users.  See below for a few options that may be useful for some users,
+or run ``cephadm bootstrap -h`` to see all available options:
+
+* Bootstrap writes the files needed to access the new cluster to
+  ``/etc/ceph`` for convenience, so that any Ceph packages installed
+  on the host itself (e.g., to access the command line interface) can
+  easily find them.
+
+  Daemon containers deployed with cephadm, however, do not need
+  ``/etc/ceph`` at all.  Use the ``--output-dir *<directory>*`` option
+  to put them in a different directory (like ``.``), avoiding any
+  potential conflicts with existing Ceph configuration (cephadm or
+  otherwise) on the same host.
+
+* You can pass any initial Ceph configuration options to the new
+  cluster by putting them in a standard ini-style configuration file
+  and using the ``--config *<config-file>*`` option.
+
+
+Enable Ceph CLI
+===============
+
+Cephadm does not require any Ceph packages to be installed on the
+host.  However, we recommend enabling easy access to the the ``ceph``
+command.  There are several ways to do this:
+
+* The ``cephadm shell`` command launches a bash shell in a container
+  with all of the Ceph packages installed.  By default, if
+  configuration and keyring files are found in ``/etc/ceph`` on the
+  host, they are passed into the container environment so that the
+  shell is fully functional::
+
+    # cephadm shell
+
+* It may be helpful to create an alias::
+
+    # alias ceph='cephadm shell --'
+
+* You can install the ``ceph-common`` package, which contains all of the
+  ceph commands, including ``ceph``, ``rbd``, ``mount.ceph`` (for mounting
+  CephFS file systems), etc.::
+
+    # cephadm add-repo --release octopus
+    # cephadm install ceph-common
+
+Confirm that the ``ceph`` command is accessible with::
+
+  # ceph -v
+
+Confirm that the ``ceph`` command can connect to the cluster and also
+its status with::
+
+  # ceph status
+
+
+Add hosts to the cluster
+========================
+
+To add each new host to the cluster, perform two steps:
+
+#. Install the cluster's public SSH key in the new host's root user's
+   ``authorized_keys`` file::
+
+     # ssh-copy-id -f -i ceph.pub root@*<new-host>*
+
+   For example::
+
+     # ssh-copy-id -f -i ceph.pub root@host2
+     # ssh-copy-id -f -i ceph.pub root@host3
+
+#. Tell Ceph that the new node is part of the cluster::
+
+     # ceph orch host add *newhost*
+
+   For example::
+
+     # ceph orch host add host2
+     # ceph orch host add host3
+
+
+Deploy additional monitors (optional)
+=====================================
+
+A typical Ceph cluster has three or five monitor daemons spread
+across different hosts.  We recommend deploying five
+monitors if there are five or more nodes in your cluster.
+
+.. _CIDR: https://en.wikipedia.org/wiki/Classless_Inter-Domain_Routing#CIDR_notation
+
+When Ceph knows what IP subnet the monitors should use it can automatically
+deploy and scale monitors as the cluster grows (or contracts).  By default,
+Ceph assumes that other monitors should use the same subnet as the first
+monitor's IP.
+
+If your Ceph monitors (or the entire cluster) live on a single subnet,
+then by default cephadm automatically adds up to 5 monitors as you add new
+hosts to the cluster. No further steps are necessary.
+
+* If there is a specific IP subnet that should be used by monitors, you
+  can configure that in `CIDR`_ format (e.g., ``10.1.2.0/24``) with::
+
+    # ceph config set mon public_network *<mon-cidr-network>*
+
+  For example::
+
+    # ceph config set mon public_network 10.1.2.0/24
+
+  Cephadm only deploys new monitor daemons on hosts that have IPs
+  configured in the configured subnet.
+
+* If you want to adjust the default of 5 monitors::
+
+    # ceph orch apply mon *<number-of-monitors>*
+
+* To deploy monitors on a specific set of hosts::
+
+    # ceph orch apply mon *<host1,host2,host3,...>*
+
+  Be sure to include the first (bootstrap) host in this list.
+
+* You can control which hosts the monitors run on by making use of
+  host labels.  To set the ``mon`` label to the appropriate
+  hosts::
+
+    # ceph orch host label add *<hostname>* mon
+
+  To view the current hosts and labels::
+
+    # ceph orch host ls
+
+  For example::
+
+    # ceph orch host label add host1 mon
+    # ceph orch host label add host2 mon
+    # ceph orch host label add host3 mon
+    # ceph orch host ls
+    HOST   ADDR   LABELS  STATUS
+    host1         mon
+    host2         mon
+    host3         mon
+    host4
+    host5
+
+  Tell cephadm to deploy monitors based on the label::
+
+    # ceph orch apply mon label:mon
+
+* You can explicitly specify the IP address or CIDR network for each monitor
+  and control where it is placed.  To disable automated monitor deployment::
+
+    # ceph orch apply mon --unmanaged
+
+  To deploy each additional monitor::
+
+    # ceph orch daemon add mon *<host1:ip-or-network1> [<host1:ip-or-network-2>...]*
+
+  For example, to deploy a second monitor on ``newhost1`` using an IP
+  address ``10.1.2.123`` and a third monitor on ``newhost2`` in
+  network ``10.1.2.0/24``::
+
+    # ceph orch apply mon --unmanaged
+    # ceph orch daemon add mon newhost1:10.1.2.123
+    # ceph orch daemon add mon newhost2:10.1.2.0/24
+
+
+Deploy OSDs
+===========
+
+An inventory of storage devices on all cluster hosts can be displayed with::
+
+  # ceph orch device ls
+
+A storage device is considered *available* if all of the following
+conditions are met:
+
+* The device must have no partitions.
+* The device must not have any LVM state.
+* The device must not be mounted.
+* The device must not contain a file system.
+* The device must not contain a Ceph BlueStore OSD.
+* The device must be larger than 5 GB.
+
+Ceph refuses to provision an OSD on a device that is not available.
+
+There are a few ways to create new OSDs:
+
+* Tell Ceph to consume any available and unused storage device::
+
+    # ceph orch apply osd --all-available-devices
+
+* Create an OSD from a specific device on a specific host::
+
+    # ceph orch daemon add osd *<host>*:*<device-path>*
+
+  For example::
+
+    # ceph orch daemon add osd host1:/dev/sdb
+
+* Use :ref:`drivegroups` to describe device(s) to consume
+  based on their properties, such device type (SSD or HDD), device
+  model names, size, or the hosts on which the devices exist::
+
+    # ceph orch osd create -i spec.yml
+
+
+Deploy MDSs
+===========
+
+One or more MDS daemons is required to use the CephFS file system.
+These are created automatically if the newer ``ceph fs volume``
+interface is used to create a new file system.  For more information,
+see :ref:`fs-volumes-and-subvolumes`.
+
+To deploy metadata servers::
+
+  # ceph orch apply mds *<fs-name>* *<num-daemons>* [*<host1>* ...]
+
+Deploy RGWs
+===========
+
+Cephadm deploys radosgw as a collection of daemons that manage a
+particular *realm* and *zone*.  (For more information about realms and
+zones, see :ref:`multisite`.)  To deploy a set of radosgw daemons for
+a particular realm and zone::
+
+  # ceph orch apply rgw *<realm-name>* *<zone-name>* *<num-daemons>* [*<host1>* ...]
+
+Note that with cephadm, radosgw daemons are configured via the monitor
+configuration database instead of via a `ceph.conf` or the command line.  If
+that confiruation isn't already in place (usually in the
+``client.rgw.<realmname>.<zonename>`` section), then the radosgw
+daemons will start up with default settings (e.g., binding to port
+80).
diff --git a/doc/cephadm/monitoring.rst b/doc/cephadm/monitoring.rst
index 0d4a16e0a5df0..31c93a3cce59b 100644
--- a/doc/cephadm/monitoring.rst
+++ b/doc/cephadm/monitoring.rst
@@ -5,7 +5,9 @@ The Ceph dashboard makes use of prometheus, grafana, and related tools
 to store and visualize detailed metrics on cluster utilization and
 performance.  Ceph users have three options:
 
-#. Have cephadm deploy and configure these services.
+#. Have cephadm deploy and configure these services.  This is the default
+   when bootstrapping a new cluster unless the ``--skip-monitoring-stack``
+   option is used.
 #. Deploy and configure these services manually.  This is recommended for users
    with existing prometheus services in their environment (and in cases where
    Ceph is running in Kubernetes with Rook).
@@ -15,7 +17,10 @@ performance.  Ceph users have three options:
 Deploying monitoring with cephadm
 ---------------------------------
 
-To deploy a basic monitoring stack:
+By default, bootstrap will deploy a basic monitoring stack.  If you
+did not do this (by passing ``--skip-monitoring-stack``, or if you
+converted an existing cluster to cephadm management, you can set up
+monitoring by following the steps below.
 
 #. Enable the prometheus module in the ceph-mgr daemon.  This exposes the internal Ceph metrics so that prometheus can scrape them.::
 
@@ -52,6 +57,18 @@ completed, you should see something like this from ``ceph orch ls``::
   node-exporter      2/2  6s ago     docker.io/prom/node-exporter:latest             e5a616e4b9cf  present
   prometheus         1/1  6s ago     docker.io/prom/prometheus:latest                e935122ab143  present
 
+Disabling monitoring
+--------------------
+
+If you have deployed monitoring and would like to remove it, you can do
+so with::
+
+  ceph orch rm grafana
+  ceph orch rm prometheus --force   # this will delete metrics data collected so far
+  ceph orch rm node-exporter
+  ceph orch rm alertmanager
+  ceph mgr module disable prometheus
+
 
 Deploying monitoring manually
 -----------------------------
diff --git a/doc/cephadm/operations.rst b/doc/cephadm/operations.rst
new file mode 100644
index 0000000000000..2e25874aabdf8
--- /dev/null
+++ b/doc/cephadm/operations.rst
@@ -0,0 +1,254 @@
+==================
+Cephadm Operations
+==================
+
+Watching cephadm log messages
+=============================
+
+Cephadm logs to the ``cephadm`` cluster log channel, meaning you can
+monitor progress in realtime with::
+
+  # ceph -W cephadm
+
+By default it will show info-level events and above.  To see
+debug-level messages too::
+
+  # ceph config set mgr mgr/cephadm/log_to_cluster_level debug
+  # ceph -W cephadm --watch-debug
+
+Be careful: the debug messages are very verbose!
+
+You can see recent events with::
+
+  # ceph log last cephadm
+
+These events are also logged to the ``ceph.cephadm.log`` file on
+monitor hosts and to the monitor daemons' stderr.
+
+
+Ceph daemon logs
+================
+
+Logging to stdout
+-----------------
+
+Traditionally, Ceph daemons have logged to ``/var/log/ceph``.  By
+default, cephadm daemons log to stderr and the logs are
+captured by the container runtime environment.  For most systems, by
+default, these logs are sent to journald and accessible via
+``journalctl``.
+
+For example, to view the logs for the daemon ``mon.foo`` for a cluster
+with ID ``5c5a50ae-272a-455d-99e9-32c6a013e694``, the command would be
+something like::
+
+  journalctl -u ceph-5c5a50ae-272a-455d-99e9-32c6a013e694@mon.foo
+
+This works well for normal operations when logging levels are low.
+
+To disable logging to stderr::
+
+  ceph config set global log_to_stderr false
+  ceph config set global mon_cluster_log_to_stderr false
+
+Logging to files
+----------------
+
+You can also configure Ceph daemons to log to files instead of stderr,
+just like they have in the past.  When logging to files, Ceph logs appear
+in ``/var/log/ceph/<cluster-fsid>``.
+
+To enable logging to files::
+
+  ceph config set global log_to_file true
+  ceph config set global mon_cluster_log_to_file true
+
+We recommend disabling logging to stderr (see above) or else everything
+will be logged twice::
+
+  ceph config set global log_to_stderr false
+  ceph config set global mon_cluster_log_to_stderr false
+
+By default, cephadm sets up log rotation on each host to rotate these
+files.  You can configure the logging retention schedule by modifying
+``/etc/logrotate.d/ceph.<cluster-fsid>``.
+
+
+Data location
+=============
+
+Cephadm daemon data and logs in slightly different locations than older
+versions of ceph:
+
+* ``/var/log/ceph/<cluster-fsid>`` contains all cluster logs.  Note
+  that by default cephadm logs via stderr and the container runtime,
+  so these logs are normally not present.
+* ``/var/lib/ceph/<cluster-fsid>`` contains all cluster daemon data
+  (besides logs).
+* ``/var/lib/ceph/<cluster-fsid>/<daemon-name>`` contains all data for
+  an individual daemon.
+* ``/var/lib/ceph/<cluster-fsid>/crash`` contains crash reports for
+  the cluster.
+* ``/var/lib/ceph/<cluster-fsid>/removed`` contains old daemon
+  data directories for stateful daemons (e.g., monitor, prometheus)
+  that have been removed by cephadm.
+
+Disk usage
+----------
+
+Because a few Ceph daemons may store a significant amount of data in
+``/var/lib/ceph`` (notably, the monitors and prometheus), we recommend
+moving this directory to its own disk, partition, or logical volume so
+that it does not fill up the root file system.
+
+
+
+SSH Configuration
+=================
+
+Cephadm uses SSH to connect to remote hosts.  SSH uses a key to authenticate
+with those hosts in a secure way.
+
+
+Default behavior
+----------------
+
+Cephadm stores an SSH key in the monitor that is used to
+connect to remote hosts.  When the cluster is bootstrapped, this SSH
+key is generated automatically and no additional configuration
+is necessary.
+
+A *new* SSH key can be generated with::
+
+  ceph cephadm generate-key
+
+The public portion of the SSH key can be retrieved with::
+
+  ceph cephadm get-pub-key
+
+The currently stored SSH key can be deleted with::
+
+  ceph cephadm clear-key
+
+You can make use of an existing key by directly importing it with::
+
+  ceph config-key set mgr/cephadm/ssh_identity_key -i <key>
+  ceph config-key set mgr/cephadm/ssh_identity_pub -i <pub>
+
+You will then need to restart the mgr daemon to reload the configuration with::
+
+  ceph mgr fail
+
+
+Customizing the SSH configuration
+---------------------------------
+
+Cephadm generates an appropriate ``ssh_config`` file that is
+used for connecting to remote hosts.  This configuration looks
+something like this::
+
+  Host *
+  User root
+  StrictHostKeyChecking no
+  UserKnownHostsFile /dev/null
+
+There are two ways to customize this configuration for your environment:
+
+#. Import a customized configuration file that will be stored
+   by the monitor with::
+
+     ceph cephadm set-ssh-config -i <ssh_config_file>
+
+   To remove a customized SSH config and revert back to the default behavior::
+
+     ceph cephadm clear-ssh-config
+
+#. You can configure a file location for the SSH configuration file with::
+
+     ceph config set mgr mgr/cephadm/ssh_config_file <path>
+
+   We do *not recommend* this approach.  The path name must be
+   visible to *any* mgr daemon, and cephadm runs all daemons as
+   containers. That means that the file either need to be placed
+   inside a customized container image for your deployment, or
+   manually distributed to the mgr data directory
+   (``/var/lib/ceph/<cluster-fsid>/mgr.<id>`` on the host, visible at
+   ``/var/lib/ceph/mgr/ceph-<id>`` from inside the container).
+
+
+Health checks
+=============
+
+CEPHADM_PAUSED
+--------------
+
+Cephadm background work has been paused with ``ceph orch pause``.  Cephadm
+continues to perform passive monitoring activities (like checking
+host and daemon status), but it will not make any changes (like deploying
+or removing daemons).
+
+Resume cephadm work with::
+
+  ceph orch resume
+
+CEPHADM_STRAY_HOST
+------------------
+
+One or more hosts have running Ceph daemons but are not registered as
+hosts managed by *cephadm*.  This means that those services cannot
+currently be managed by cephadm (e.g., restarted, upgraded, included
+in `ceph orch ps`).
+
+You can manage the host(s) with::
+
+  ceph orch host add *<hostname>*
+
+Note that you may need to configure SSH access to the remote host
+before this will work.
+
+Alternatively, you can manually connect to the host and ensure that
+services on that host are removed or migrated to a host that is
+managed by *cephadm*.
+
+You can also disable this warning entirely with::
+
+  ceph config set mgr mgr/cephadm/warn_on_stray_hosts false
+
+CEPHADM_STRAY_DAEMON
+--------------------
+
+One or more Ceph daemons are running but not are not managed by
+*cephadm*.  This may be because they were deployed using a different
+tool, or because they were started manually.  Those
+services cannot currently be managed by cephadm (e.g., restarted,
+upgraded, or included in `ceph orch ps`).
+
+If the daemon is a stateful one (monitor or OSD), it should be adopted
+by cephadm; see :ref:`cephadm-adoption`.  For stateless daemons, it is
+usually easiest to provision a new daemon with the ``ceph orch apply``
+command and then stop the unmanaged daemon.
+
+This warning can be disabled entirely with::
+
+  ceph config set mgr mgr/cephadm/warn_on_stray_daemons false
+
+CEPHADM_HOST_CHECK_FAILED
+-------------------------
+
+One or more hosts have failed the basic cephadm host check, which verifies
+that (1) the host is reachable and cephadm can be executed there, and (2)
+that the host satisfies basic prerequisites, like a working container
+runtime (podman or docker) and working time synchronization.
+If this test fails, cephadm will no be able to manage services on that host.
+
+You can manually run this check with::
+
+  ceph cephadm check-host *<hostname>*
+
+You can remove a broken host from management with::
+
+  ceph orch host rm *<hostname>*
+
+You can disable this health warning with::
+
+  ceph config set mgr mgr/cephadm/warn_on_failed_host_check false
diff --git a/doc/cephadm/troubleshooting.rst b/doc/cephadm/troubleshooting.rst
new file mode 100644
index 0000000000000..375420ad18282
--- /dev/null
+++ b/doc/cephadm/troubleshooting.rst
@@ -0,0 +1,71 @@
+
+Troubleshooting
+===============
+
+Sometimes there is a need to investigate why a cephadm command failed or why
+a specific service no longer runs properly.
+
+As cephadm deploys daemons as containers, troubleshooting daemons is slightly
+different. Here are a few tools and commands to help investigating issues.
+
+Gathering log files
+-------------------
+
+Use journalctl to gather the log files of all daemons:
+
+.. note:: By default cephadm now stores logs in journald. This means
+   that you will no longer find daemon logs in ``/var/log/ceph/``.
+
+To read the log file of one specific daemon, run::
+
+    cephadm logs --name <name-of-daemon>
+
+Note: this only works when run on the same host where the daemon is running. To
+get logs of a daemon running on a different host, give the ``--fsid`` option::
+
+    cephadm logs --fsid <fsid> --name <name-of-daemon>
+
+where the ``<fsid>`` corresponds to the cluster ID printed by ``ceph status``.
+
+To fetch all log files of all daemons on a given host, run::
+
+    for name in $(cephadm ls | jq -r '.[].name') ; do
+      cephadm logs --fsid <fsid> --name "$name" > $name;
+    done
+
+Collecting systemd status
+-------------------------
+
+To print the state of a systemd unit, run::
+
+      systemctl status "ceph-$(cephadm shell ceph fsid)@<service name>.service";
+
+
+To fetch all state of all daemons of a given host, run::
+
+    fsid="$(cephadm shell ceph fsid)"
+    for name in $(cephadm ls | jq -r '.[].name') ; do
+      systemctl status "ceph-$fsid@$name.service" > $name;
+    done
+
+
+List all downloaded container images
+------------------------------------
+
+To list all container images that are downloaded on a host:
+
+.. note:: ``Image`` might also be called `ImageID`
+
+::
+
+    podman ps -a --format json | jq '.[].Image'
+    "docker.io/library/centos:8"
+    "registry.opensuse.org/opensuse/leap:15.2"
+
+
+Manually running containers
+---------------------------
+
+Cephadm writes small wrappers that run a containers. Refer to
+``/var/lib/ceph/<cluster-fsid>/<service-name>/unit.run`` for the
+container execution command.
diff --git a/doc/cephadm/upgrade.rst b/doc/cephadm/upgrade.rst
new file mode 100644
index 0000000000000..9ffa68f65df93
--- /dev/null
+++ b/doc/cephadm/upgrade.rst
@@ -0,0 +1,118 @@
+==============
+Upgrading Ceph
+==============
+
+Cephadm is capable of safely upgrading Ceph from one bugfix release to
+another.  For example, you can upgrade from v15.2.0 (the first Octopus
+release) to the next point release v15.2.1.
+
+The automated upgrade process follows Ceph best practices.  For example:
+
+* The upgrade order starts with managers, monitors, then other daemons.
+* Each daemon is restarted only after Ceph indicates that the cluster
+  will remain available.
+
+Keep in mind that the Ceph cluster health status is likely to switch to
+`HEALTH_WARNING` during the upgrade.
+
+
+Starting the upgrade
+====================
+
+Before you start, you should verify that all hosts are currently online
+and your cluster is healthy.
+
+::
+
+  # ceph -s
+
+To upgrade (or downgrade) to a specific release::
+
+  # ceph upgrade start --version <version>
+
+For example, to upgrade to v15.2.1::
+
+  # ceph upgrade start --version 15.2.1
+
+
+Monitoring the upgrade
+======================
+
+Determine whether an upgrade is in process and what version the cluster is
+upgrading to with::
+
+  # ceph upgrade status
+
+While the upgrade is underway, you will see a progress bar in the ceph
+status output.  For example::
+
+  # ceph -s
+  [...]
+    progress:
+      Upgrade to docker.io/ceph/ceph:v15.2.1 (00h 20m 12s)
+        [=======.....................] (time remaining: 01h 43m 31s)
+
+You can also watch the cephadm log with::
+
+  # ceph -W cephadm
+
+
+Canceling an upgrade
+====================
+
+You can stop the upgrade process at any time with::
+
+  # ceph upgrade stop
+
+
+Potential problems
+==================
+
+There are a few health alerts that can arise during the upgrade process.
+
+UPGRADE_NO_STANDBY_MGR
+----------------------
+
+Ceph requires an active and standby manager daemon in order to proceed, but
+there is currently no standby.
+
+You can ensure that Cephadm is configured to run 2 (or more) managers with::
+
+  # ceph orch apply mgr 2  # or more
+
+You can check the status of existing mgr daemons with::
+
+  # ceph orch ps --daemon-type mgr
+
+If an existing mgr daemon has stopped, you can try restarting it with::
+
+  # ceph orch daemon restart <name>
+
+UPGRADE_FAILED_PULL
+-------------------
+
+Ceph was unable to pull the container image for the target version.
+This can happen if you specify an version or container image that does
+not exist (e.g., 1.2.3), or if the container registry is not reachable from
+one or more hosts in the cluster.
+
+You can cancel the existing upgrade and specify a different target version with::
+
+  # ceph upgrade stop
+  # ceph upgrade start --version <version>
+
+
+Using customized container images
+=================================
+
+For most users, simplify specifying the Ceph version is sufficient.
+Cephadm will locate the specific Ceph container image to use by
+combining the ``container_image_base`` configuration option (default:
+``docker.io/ceph/ceph``) with a tag of ``vX.Y.Z``.
+
+You can also upgrade to an arbitrary container image.  For example, to
+upgrade to a development build::
+
+  # ceph upgrade start --image quay.io/ceph-ci/ceph:recent-git-branch-name
+
+For more information about available container images, see :ref:`containers`.
diff --git a/doc/cephfs/fs-volumes.rst b/doc/cephfs/fs-volumes.rst
index 052f4f5d89e76..62eb4ec0e5b74 100644
--- a/doc/cephfs/fs-volumes.rst
+++ b/doc/cephfs/fs-volumes.rst
@@ -258,5 +258,28 @@ only unprotected snapshots can be removed. This guarantees that a snapshot canno
 .. note:: Cloning only synchronizes directories, regular files and symbolic links. Also, inode timestamps (access and
           modification times) are synchronized upto seconds granularity.
 
+An `in-progress` or a `pending` clone operation can be canceled. To cancel a clone operation use the `clone cancel` command::
+
+  $ ceph fs clone cancel <vol_name> <clone_name> [--group_name <group_name>]
+
+On successful cancelation, the cloned subvolume is moved to `canceled` state::
+
+  $ ceph fs subvolume snapshot protect cephfs subvol1 snap1
+  $ ceph fs subvolume snapshot clone cephfs subvol1 snap1 clone1
+  $ ceph fs clone cancel cephfs clone1
+  $ ceph fs clone status cephfs clone1
+  {
+    "status": {
+      "state": "canceled",
+      "source": {
+        "volume": "cephfs",
+        "subvolume": "subvol1",
+        "snapshot": "snap1"
+      }
+    }
+  }
+
+.. note:: The canceled cloned can be deleted by using --force option in `fs subvolume rm` command.
+
 .. _manila: https://github.com/openstack/manila
 .. _CSI: https://github.com/ceph/ceph-csi
diff --git a/doc/index.rst b/doc/index.rst
index af3b49c6c33cf..fe10a9f288c96 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -92,8 +92,8 @@ about Ceph, see our `Architecture`_ section.
    :hidden:
 
    start/intro
-   cephadm/index
    install/index
+   cephadm/index
    rados/index
    cephfs/index
    rbd/index
diff --git a/doc/install/containers.rst b/doc/install/containers.rst
index ab29c0e3ad260..c819c646dd0c5 100644
--- a/doc/install/containers.rst
+++ b/doc/install/containers.rst
@@ -1,3 +1,5 @@
+.. _containers:
+
 Ceph Container Images
 =====================
 
diff --git a/doc/install/index.rst b/doc/install/index.rst
index 69a48a6d003c1..507247ca90b26 100644
--- a/doc/install/index.rst
+++ b/doc/install/index.rst
@@ -4,109 +4,66 @@
 Installing Ceph
 ===============
 
-There are various options for installing Ceph. Review the documention for each method before choosing the one that best serves your needs.
+There are several different ways to install Ceph.  Choose the
+method that best suites your needs.
 
-We recommend the following installation methods:
+Recommended methods
+~~~~~~~~~~~~~~~~~~~
 
-   * cephadm
-   * Rook
+:ref:`Cephadm <cephadm>` installs and manages a Ceph cluster using containers and
+systemd, with tight integration with the CLI and dashboard GUI.
 
+* cephadm only supports only Octopus and newer releases.
+* cephadm is fully integrated with the new orchestration API and
+  fully supports the new CLI and dashboard features to manage
+  cluster deployment.
+* cephadm requires container support (podman or docker) and
+  Python 3.
 
-We offer these other methods of installation in addition to the ones we recommend:
+`Rook <https://rook.io/>`_ deploys and manages Ceph clusters running
+in Kubernetes, while also enabling management of storage resources and
+provisioning via Kubernetes APIs.  We recommend Rook as the way to run Ceph in
+Kubernetes or to connect an existing Ceph storage cluster to Kubernetes.
 
-   * ceph-ansible
-   * ceph-deploy (no longer actively maintained)
-   * Deepsea (Salt)
-   * Juju
-   * Manual installation (using packages)
-   * Puppet
+* Rook only supports Nautilus and newer releases of Ceph.
+* Rook is the preferred method for running Ceph on Kubernetes, or for
+  connecting a Kubernetes cluster to an existing (external) Ceph
+  cluster.
+* Rook supports the new orchestrator API. New management features
+  in the CLI and dashboard are fully supported.
 
+Other methods
+~~~~~~~~~~~~~
 
-Recommended Methods of Ceph Installation
-========================================
+`ceph-ansible <https://docs.ceph.com/ceph-ansible/>`_ deploys and manages
+Ceph clusters using Ansible.
 
-cephadm
--------
+* ceph-ansible is widely deployed.
+* ceph-ansible is not integrated with the new orchestrator APIs,
+  introduced in Nautlius and Octopus, which means that newer
+  management features and dashboard integration are not available.
 
-Installs Ceph using containers and systemd.
 
-* :ref:`cephadm-bootstrap`
-
-     * cephadm is supported only on Octopus and newer releases.
-     * cephadm is fully integrated with the new orcehstration API and fully supports the new CLI and dashboard features to manage cluster deployment.
-     * cephadm requires container support (podman or docker) and Python 3.
-
-Rook
-----
-
-Installs Ceph in Kubernetes.
-
-* `rook.io <https://rook.io/>`_ 
-
-   * Rook supports only Nautilus and newer releases of Ceph.
-   * Rook is the preferred deployment method for Ceph with Kubernetes.
-   * Rook fully suports the new orchestrator API. New management features in the CLI and dashboard are fully supported.
-
-Other Methods of Ceph Installation
-==================================
-
-ceph-ansible
-------------
-
-Installs Ceph using Ansible.
-
-* `docs.ceph.com/ceph-ansible <https://docs.ceph.com/ceph-ansible/>`_ 
-
-ceph-deploy
------------
-
-Install ceph using ceph-deploy
-
-* :ref:`ceph-deploy-index`
+:ref:`ceph-deploy <ceph-deploy-index>` is a tool for quickly deploying clusters.
 
   .. IMPORTANT::
 
-
    ceph-deploy is no longer actively maintained. It is not tested on versions of Ceph newer than Nautilus. It does not support RHEL8, CentOS 8, or newer operating systems.
 
-.. toctree::
-   :hidden:
-  
-   ceph-deploy/index
-
-
-DeepSea
--------
-
-Install Ceph using Salt
-
-* `github.com/SUSE/DeepSea <https://github.com/SUSE/DeepSea>`_
-
-Juju
-----
+`DeepSea <https://github.com/SUSE/DeepSea>`_ installs Ceph using Salt.
 
-Installs Ceph using Juju.
+`jaas.ai/ceph-mon <https://jaas.ai/ceph-mon>`_ installs Ceph using Juju.
 
-* `jaas.ai/ceph-mon <https://jaas.ai/ceph-mon>`_
+`github.com/openstack/puppet-ceph <https://github.com/openstack/puppet-ceph>`_  installs Ceph via Puppet.
 
+Ceph can also be :ref:`installed manually <install-manual>`.
 
-Manual
-------
-
-Manually install Ceph using packages.
-
-* :ref:`install-manual`
 
 .. toctree::
    :hidden:
-  
-   index_manual
 
-Puppet
-------
-
-Installs Ceph using Puppet
+   index_manual
+   ceph-deploy/index
 
-* `github.com/openstack/puppet-ceph <https://github.com/openstack/puppet-ceph>`_
 
 
diff --git a/doc/releases/octopus.rst b/doc/releases/octopus.rst
index c9968502d2ec4..b8dff086e8fea 100644
--- a/doc/releases/octopus.rst
+++ b/doc/releases/octopus.rst
@@ -14,7 +14,7 @@ General
 * A new deployment tool called **cephadm** has been introduced that
   integrates Ceph daemon deployment and management via containers
   into the orchestration layer.  For more information see
-  :ref:`cephadm-bootstrap`.
+  :ref:`cephadm`.
 * Health alerts can now be muted, either temporarily or permanently.
 * A simple 'alerts' capability has been introduced to send email
   health alerts for clusters deployed without the benefit of an
@@ -113,13 +113,21 @@ RADOS
 **RBD** block storage
 ~~~~~~~~~~~~~~~~~~~~~
   
+* Mirroring now supports a new snapshot-based mode that no longer requires
+  the journaling feature and its related impacts in exchange for the loss
+  of point-in-time consistency (it remains crash consistent).
 * Clone operations now preserve the sparseness of the underlying RBD image.
 * The trash feature has been improved to (optionally) automatically
   move old parent images to the trash when their children are all
   deleted or flattened.
+* The trash can be configured to automatically purge on a defined schedule.
+* Images can be online re-sparsified to reduce the usage of zeroed extents.
 * The ``rbd-nbd`` tool has been improved to use more modern kernel interfaces.
 * Caching has been improved to be more efficient and performant.
-
+* ``rbd-mirror`` automatically adjusts its per-image memory usage based
+  upon its memory target.
+* A new persistent read-only caching daemon is available to offload reads from
+  shared parent images.
 
 **RGW** object storage
 ~~~~~~~~~~~~~~~~~~~~~~
diff --git a/qa/suites/rados/cephadm/upgrade/1-start.yaml b/qa/suites/rados/cephadm/upgrade/1-start.yaml
index 25e6ee90e8502..6e974cef32f51 100644
--- a/qa/suites/rados/cephadm/upgrade/1-start.yaml
+++ b/qa/suites/rados/cephadm/upgrade/1-start.yaml
@@ -1,4 +1,4 @@
 tasks:
 - cephadm:
-    image: quay.io/ceph-ci/ceph:wip-sage3-testing-2020-03-14-0747
-    cephadm_branch: wip-sage3-testing-2020-03-14-0747
+    image: quay.io/ceph-ci/ceph:wip-sage-testing-2020-03-16-1740
+    cephadm_branch: wip-sage-testing-2020-03-16-1740
diff --git a/qa/suites/rados/verify/ceph.yaml b/qa/suites/rados/verify/ceph.yaml
index ed4f5a5bc9112..fc5ce350ad6a4 100644
--- a/qa/suites/rados/verify/ceph.yaml
+++ b/qa/suites/rados/verify/ceph.yaml
@@ -1,8 +1,6 @@
 overrides:
   ceph:
     conf:
-      global:
-        osd heartbeat grace: 60
       mon:
         mon min osdmap epochs: 50
         paxos service trim min: 10
@@ -12,8 +10,6 @@ overrides:
         mon osdmap full prune txsize: 2
       osd:
         debug monc: 20
-        debug ms: 1
-        debug osd: 20
 tasks:
 - install:
 - ceph:
diff --git a/qa/suites/rados/verify/tasks/rados_api_tests.yaml b/qa/suites/rados/verify/tasks/rados_api_tests.yaml
index d61c6f8b1297d..79f24479ae382 100644
--- a/qa/suites/rados/verify/tasks/rados_api_tests.yaml
+++ b/qa/suites/rados/verify/tasks/rados_api_tests.yaml
@@ -23,6 +23,8 @@ overrides:
 tasks:
 - workunit:
     timeout: 6h
+    env:
+      ALLOW_TIMEOUTS: "1"
     clients:
       client.0:
         - rados/test.sh
diff --git a/qa/suites/rados/verify/validater/valgrind.yaml b/qa/suites/rados/verify/validater/valgrind.yaml
index fe8e0e1cac045..83eb2add54300 100644
--- a/qa/suites/rados/verify/validater/valgrind.yaml
+++ b/qa/suites/rados/verify/validater/valgrind.yaml
@@ -9,18 +9,21 @@ overrides:
   ceph:
     conf:
       global:
-        osd heartbeat grace: 40
-        debug refs: 5
+        osd heartbeat grace: 80
       mon:
         mon osd crush smoke test: false
       osd:
         osd fast shutdown: false
+        debug bluestore: 1
+        debug bluefs: 1
     log-whitelist:
       - overall HEALTH_
 # valgrind is slow.. we might get PGs stuck peering etc
       - \(PG_
 # mons sometimes are left off of initial quorum due to valgrind slowness.  ok to whitelist here because we'll still catch an actual crash due to the core
       - \(MON_DOWN\)
+      - \(SLOW_OPS\)
+      - slow request
     valgrind:
       mon: [--tool=memcheck, --leak-check=full, --show-reachable=yes]
       osd: [--tool=memcheck]
diff --git a/qa/tasks/cephadm.py b/qa/tasks/cephadm.py
index 8f7947571fc6e..2b076053a784c 100644
--- a/qa/tasks/cephadm.py
+++ b/qa/tasks/cephadm.py
@@ -353,11 +353,13 @@ def ceph_bootstrap(ctx, config):
             'sudo',
             ctx.cephadm,
             '--image', ctx.ceph[cluster_name].image,
+            '-v',
             'bootstrap',
             '--fsid', fsid,
             '--mon-id', first_mon,
             '--mgr-id', first_mgr,
             '--orphan-initial-daemons',   # we will do it explicitly!
+            '--skip-monitoring-stack',    # we'll provision these explicitly
             '--config', '{}/seed.{}.conf'.format(testdir, cluster_name),
             '--output-config', '/etc/ceph/{}.conf'.format(cluster_name),
             '--output-keyring',
@@ -411,6 +413,10 @@ def ceph_bootstrap(ctx, config):
             'sudo', 'chmod', '0600', '/root/.ssh/authorized_keys',
         ])
 
+        # set options
+        _shell(ctx, cluster_name, bootstrap_remote,
+               ['ceph', 'config', 'set', 'mgr', 'mgr/cephadm/allow_ptrace', 'true'])
+
         # add other hosts
         for remote in ctx.cluster.remotes.keys():
             if remote == bootstrap_remote:
diff --git a/qa/tasks/cephfs/test_volumes.py b/qa/tasks/cephfs/test_volumes.py
index ea8bc937eb204..6f5bdec237a3c 100644
--- a/qa/tasks/cephfs/test_volumes.py
+++ b/qa/tasks/cephfs/test_volumes.py
@@ -49,6 +49,9 @@ def _wait_for_clone_to_complete(self, clone, clone_group=None, timo=120):
     def _wait_for_clone_to_fail(self, clone, clone_group=None, timo=120):
         self.__check_clone_state("failed", clone, clone_group, timo)
 
+    def _check_clone_canceled(self, clone, clone_group=None):
+        self.__check_clone_state("canceled", clone, clone_group, timo=1)
+
     def _verify_clone_attrs(self, subvolume, clone, source_group=None, clone_group=None):
         path1 = self._get_subvolume_path(self.volname, subvolume, group_name=source_group)
         path2 = self._get_subvolume_path(self.volname, clone, group_name=clone_group)
@@ -2126,3 +2129,115 @@ def test_subvolume_snapshot_attr_clone(self):
 
         # verify trash dir is clean
         self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_cancel_in_progress(self):
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clone = self._generate_random_clone_name()
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=128)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # now, protect snapshot
+        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
+
+        # schedule a clone
+        self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        # cancel on-going clone
+        self._fs_cmd("clone", "cancel", self.volname, clone)
+
+        # verify canceled state
+        self._check_clone_canceled(clone)
+
+        # now, unprotect snapshot
+        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
+
+    def test_subvolume_snapshot_clone_cancel_pending(self):
+        """
+        this test is a bit more involved compared to canceling an in-progress clone.
+        we'd need to ensure that a to-be canceled clone has still not been picked up
+        by cloner threads. exploit the fact that clones are picked up in an FCFS
+        fashion and there are four (4) cloner threads by default. When the number of
+        cloner threads increase, this test _may_ start tripping -- so, the number of
+        clone operations would need to be jacked up.
+        """
+        # default number of clone threads
+        NR_THREADS = 4
+        # good enough for 4 threads
+        NR_CLONES = 5
+        # yeh, 1gig -- we need the clone to run for sometime
+        FILE_SIZE_MB = 1024
+
+        subvolume = self._generate_random_subvolume_name()
+        snapshot = self._generate_random_snapshot_name()
+        clones = self._generate_random_clone_name(NR_CLONES)
+
+        # create subvolume
+        self._fs_cmd("subvolume", "create", self.volname, subvolume)
+
+        # do some IO
+        self._do_subvolume_io(subvolume, number_of_files=4, file_size=FILE_SIZE_MB)
+
+        # snapshot subvolume
+        self._fs_cmd("subvolume", "snapshot", "create", self.volname, subvolume, snapshot)
+
+        # now, protect snapshot
+        self._fs_cmd("subvolume", "snapshot", "protect", self.volname, subvolume, snapshot)
+
+        # schedule clones
+        for clone in clones:
+            self._fs_cmd("subvolume", "snapshot", "clone", self.volname, subvolume, snapshot, clone)
+
+        to_wait = clones[0:NR_THREADS]
+        to_cancel = clones[NR_THREADS:]
+
+        # cancel pending clones and verify
+        for clone in to_cancel:
+            status = json.loads(self._fs_cmd("clone", "status", self.volname, clone))
+            self.assertEqual(status["status"]["state"], "pending")
+            self._fs_cmd("clone", "cancel", self.volname, clone)
+            self._check_clone_canceled(clone)
+
+        # let's cancel on-going clones. handle the case where some of the clones
+        # _just_ complete
+        for clone in list(to_wait):
+            try:
+                self._fs_cmd("clone", "cancel", self.volname, clone)
+                to_cancel.append(clone)
+                to_wait.remove(clone)
+            except CommandFailedError as ce:
+                if ce.exitstatus != errno.EINVAL:
+                    raise RuntimeError("invalid error code when cancelling on-going clone")
+
+        # now, unprotect snapshot
+        self._fs_cmd("subvolume", "snapshot", "unprotect", self.volname, subvolume, snapshot)
+
+        # remove snapshot
+        self._fs_cmd("subvolume", "snapshot", "rm", self.volname, subvolume, snapshot)
+
+        # remove subvolumes
+        self._fs_cmd("subvolume", "rm", self.volname, subvolume)
+        for clone in to_wait:
+            self._fs_cmd("subvolume", "rm", self.volname, clone)
+        for clone in to_cancel:
+            self._fs_cmd("subvolume", "rm", self.volname, clone, "--force")
+
+        # verify trash dir is clean
+        self._wait_for_trash_empty()
diff --git a/qa/tasks/mgr/dashboard/test_osd.py b/qa/tasks/mgr/dashboard/test_osd.py
index 0c17be4f6a104..1bd75e4b09472 100644
--- a/qa/tasks/mgr/dashboard/test_osd.py
+++ b/qa/tasks/mgr/dashboard/test_osd.py
@@ -111,8 +111,10 @@ def test_create_lost_destroy_remove(self):
     def test_create_with_drive_group(self):
         data = {
             'method': 'drive_groups',
-            'data': {
-                'test': {
+            'data': [
+                {
+                    'service_type': 'osd',
+                    'service_id': 'test',
                     'host_pattern': '*',
                     'data_devices': {
                         'vendor': 'abc',
@@ -136,7 +138,7 @@ def test_create_with_drive_group(self):
                     'db_slots': 5,
                     'encrypted': True
                 }
-            },
+            ],
             'tracking_id': 'test'
         }
         self._post('/api/osd', data)
diff --git a/qa/workunits/cephadm/test_cephadm.sh b/qa/workunits/cephadm/test_cephadm.sh
index 7ce4eaf1e534f..b6c6cad306cfb 100755
--- a/qa/workunits/cephadm/test_cephadm.sh
+++ b/qa/workunits/cephadm/test_cephadm.sh
@@ -6,7 +6,7 @@ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 FSID='00000000-0000-0000-0000-0000deadbeef'
 
 # images that are used
-IMAGE_MASTER=${IMAGE_MASTER:-'docker.io/ceph/daemon-base:latest-master-devel'}
+IMAGE_MASTER=${IMAGE_MASTER:-'quay.io/ceph-ci/ceph:octopus'} # octopus for octopus branch
 IMAGE_NAUTILUS=${IMAGE_NAUTILUS:-'docker.io/ceph/daemon-base:latest-nautilus'}
 IMAGE_MIMIC=${IMAGE_MIMIC:-'docker.io/ceph/daemon-base:latest-mimic'}
 
@@ -190,7 +190,10 @@ $CEPHADM bootstrap \
       --config $ORIG_CONFIG \
       --output-config $CONFIG \
       --output-keyring $KEYRING \
-      --allow-overwrite
+      --output-pub-ssh-key $TMPDIR/ceph.pub \
+      --allow-overwrite \
+      --skip-mon-network \
+      --skip-monitoring-stack
 test -e $CONFIG
 test -e $KEYRING
 rm -f $ORIG_CONFIG
@@ -211,6 +214,11 @@ systemctl | grep system-ceph | grep -q .slice  # naming is escaped and annoying
 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
       ceph -s | grep $FSID
 
+for t in mon mgr node-exporter prometheus grafana; do
+    $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	     ceph orch apply $t --unmanaged
+done
+
 ## ls
 $CEPHADM ls | jq '.[]' | jq 'select(.name == "mon.a").fsid' \
     | grep $FSID
@@ -299,6 +307,8 @@ $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
         ceph osd pool create $nfs_rados_pool 64
 $CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
         rados --pool nfs-ganesha --namespace nfs-ns create conf-nfs.a
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	 ceph orch pause
 $CEPHADM deploy --name nfs.a \
       --fsid $FSID \
       --keyring $KEYRING \
@@ -306,6 +316,8 @@ $CEPHADM deploy --name nfs.a \
       --config-json ${CEPHADM_SAMPLES_DIR}/nfs.json
 cond="$SUDO ss -tlnp '( sport = :nfs )' | grep 'ganesha.nfsd'"
 is_available "nfs" "$cond" 10
+$CEPHADM shell --fsid $FSID --config $CONFIG --keyring $KEYRING -- \
+	 ceph orch resume
 
 ## run
 # WRITE ME
diff --git a/qa/workunits/rbd/permissions.sh b/qa/workunits/rbd/permissions.sh
index 68144d2b9402e..f8a9aaa7128ef 100755
--- a/qa/workunits/rbd/permissions.sh
+++ b/qa/workunits/rbd/permissions.sh
@@ -168,12 +168,11 @@ create_self_managed_snapshot() {
   cat << EOF | CEPH_ARGS="-k $KEYRING" python3
 import rados
 
-cluster = rados.Rados(conffile="", rados_id="${ID}")
-cluster.connect()
-ioctx = cluster.open_ioctx("${POOL}")
+with rados.Rados(conffile="", rados_id="${ID}") as cluster:
+  ioctx = cluster.open_ioctx("${POOL}")
 
-snap_id = ioctx.create_self_managed_snap()
-print ("Created snap id {}".format(snap_id))
+  snap_id = ioctx.create_self_managed_snap()
+  print ("Created snap id {}".format(snap_id))
 EOF
 }
 
@@ -184,19 +183,17 @@ remove_self_managed_snapshot() {
   cat << EOF | CEPH_ARGS="-k $KEYRING" python3
 import rados
 
-cluster1 = rados.Rados(conffile="", rados_id="mon_write")
-cluster1.connect()
-ioctx1 = cluster1.open_ioctx("${POOL}")
+with rados.Rados(conffile="", rados_id="mon_write") as cluster1, \
+     rados.Rados(conffile="", rados_id="${ID}") as cluster2:
+  ioctx1 = cluster1.open_ioctx("${POOL}")
 
-snap_id = ioctx1.create_self_managed_snap()
-print ("Created snap id {}".format(snap_id))
+  snap_id = ioctx1.create_self_managed_snap()
+  print ("Created snap id {}".format(snap_id))
 
-cluster2 = rados.Rados(conffile="", rados_id="${ID}")
-cluster2.connect()
-ioctx2 = cluster2.open_ioctx("${POOL}")
+  ioctx2 = cluster2.open_ioctx("${POOL}")
 
-ioctx2.remove_self_managed_snap(snap_id)
-print ("Removed snap id {}".format(snap_id))
+  ioctx2.remove_self_managed_snap(snap_id)
+  print ("Removed snap id {}".format(snap_id))
 EOF
 }
 
diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm
index 1357f1e210a63..af145a459bb9a 100755
--- a/src/cephadm/cephadm
+++ b/src/cephadm/cephadm
@@ -1,6 +1,7 @@
 #!/usr/bin/python3
 
-DEFAULT_IMAGE='docker.io/ceph/daemon-base:latest-master-devel'  # FIXME when octopus is ready!!!
+#DEFAULT_IMAGE='docker.io/ceph/ceph:v15.2'
+DEFAULT_IMAGE='quay.io/ceph-ci/ceph:octopus'
 DATA_DIR='/var/lib/ceph'
 LOG_DIR='/var/log/ceph'
 LOCK_DIR='/run/cephadm'
@@ -1422,9 +1423,11 @@ def get_container_mounts(fsid, daemon_type, daemon_id,
 
     return mounts
 
-def get_container(fsid, daemon_type, daemon_id, privileged=False,
+def get_container(fsid, daemon_type, daemon_id,
+                  privileged=False,
+                  ptrace=False,
                   container_args=[]):
-    # type: (str, str, Union[int, str], bool, List[str]) -> CephContainer
+    # type: (str, str, Union[int, str], bool, bool, List[str]) -> CephContainer
     if daemon_type in ['mon', 'osd']:
         # mon and osd need privileged in order for libudev to query devices
         privileged = True
@@ -1484,6 +1487,7 @@ def get_container(fsid, daemon_type, daemon_id, privileged=False,
         cname='ceph-%s-%s.%s' % (fsid, daemon_type, daemon_id),
         envs=envs,
         privileged=privileged,
+        ptrace=ptrace,
     )
 
 def extract_uid_gid(img='', file_path='/var/lib/ceph'):
@@ -1838,16 +1842,18 @@ class CephContainer:
                  cname='',
                  container_args=[],
                  envs=None,
-                 privileged=False):
-        # type: (str, str, List[str], Dict[str, str], str, List[str], Optional[List[str]], Optional[bool]) -> None
+                 privileged=False,
+                 ptrace=False):
+        # type: (str, str, List[str], Dict[str, str], str, List[str], Optional[List[str]], bool, bool) -> None
         self.image = image
         self.entrypoint = entrypoint
         self.args = args
         self.volume_mounts = volume_mounts
         self.cname = cname
         self.container_args = container_args
-        self.privileged = privileged
         self.envs = envs
+        self.privileged = privileged
+        self.ptrace = ptrace
 
     def run_cmd(self):
         # type: () -> List[str]
@@ -1863,6 +1869,8 @@ class CephContainer:
             priv = ['--privileged',
                     # let OSD etc read block devs that haven't been chowned
                     '--group-add=disk']
+        if self.ptrace:
+            priv.append('--cap-add=SYS_PTRACE')
         vols = sum(
             [['-v', '%s:%s' % (host_dir, container_dir)]
              for host_dir, container_dir in self.volume_mounts.items()], [])
@@ -2010,6 +2018,7 @@ def command_bootstrap():
 
     # ip
     r = re.compile(r':(\d+)$')
+    base_ip = None
     if args.mon_ip:
         hasport = r.findall(args.mon_ip)
         if hasport:
@@ -2022,8 +2031,10 @@ def command_bootstrap():
                 logger.warning('Using msgr2 protocol for unrecognized port %d' %
                                port)
                 addr_arg = '[v2:%s]' % args.mon_ip
-            check_ip_port(args.mon_ip[0:-(len(str(port)))-1], port)
+            base_ip = args.mon_ip[0:-(len(str(port)))-1]
+            check_ip_port(base_ip, port)
         else:
+            base_ip = args.mon_ip
             addr_arg = '[v2:%s:3300,v1:%s:6789]' % (args.mon_ip, args.mon_ip)
             check_ip_port(args.mon_ip, 3300)
             check_ip_port(args.mon_ip, 6789)
@@ -2040,10 +2051,25 @@ def command_bootstrap():
             port = int(hasport[0])
             # strip off v1: or v2: prefix
             addr = re.sub(r'^\w+:', '', addr)
-            check_ip_port(addr[0:-(len(str(port)))-1], port)
+            base_ip = addr[0:-(len(str(port)))-1]
+            check_ip_port(base_ip, port)
     else:
         raise Error('must specify --mon-ip or --mon-addrv')
-    logger.debug('Final addrv is %s' % addr_arg)
+    logger.debug('Base mon IP is %s, final addrv is %s' % (base_ip, addr_arg))
+
+    mon_network = None
+    if not args.skip_mon_network:
+        # make sure IP is configured locally, and then figure out the
+        # CIDR network
+        for net, ips in list_networks().items():
+            if base_ip in ips:
+                mon_network = net
+                logger.info('Mon IP %s is in CIDR network %s' % (base_ip,
+                                                                 mon_network))
+                break
+        if not mon_network:
+            raise Error('Failed to infer CIDR network for mon ip %s; pass '
+                        '--skip-mon-network to configure it later' % base_ip)
 
     # config
     cp = read_config(args.config)
@@ -2226,6 +2252,10 @@ def command_bootstrap():
             get_unit_name(fsid, 'mon', mon_id)
         ])
 
+    if mon_network:
+        logger.info('Setting mon public_network...')
+        cli(['config', 'set', 'mon', 'public_network', mon_network])
+
     # create mgr
     logger.info('Creating mgr...')
     mgr_keyring = '[mgr.%s]\n\tkey = %s\n' % (mgr_id, mgr_key)
@@ -2317,6 +2347,13 @@ def command_bootstrap():
                 logger.info('Deploying %s service with default placement...' % t)
                 cli(['orch', 'apply', t])
 
+        if not args.skip_monitoring_stack:
+            logger.info('Enabling mgr prometheus module...')
+            cli(['mgr', 'module', 'enable', 'prometheus'])
+            for t in ['prometheus', 'grafana', 'node-exporter', 'alertmanager']:
+                logger.info('Deploying %s service with default placement...' % t)
+                cli(['orch', 'apply', t])
+
     if not args.skip_dashboard:
         logger.info('Enabling the dashboard module...')
         cli(['mgr', 'module', 'enable', 'dashboard'])
@@ -2399,7 +2436,8 @@ def command_deploy():
         (config, keyring) = get_config_and_keyring()
         (uid, gid) = extract_uid_gid()
         make_var_run(args.fsid, uid, gid)
-        c = get_container(args.fsid, daemon_type, daemon_id)
+        c = get_container(args.fsid, daemon_type, daemon_id,
+                          ptrace=args.allow_ptrace)
         deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
                       config=config, keyring=keyring,
                       osd_fsid=args.osd_fsid,
@@ -2624,6 +2662,39 @@ def command_logs():
 
 ##################################
 
+def list_networks():
+    # type: () -> Dict[str,List[str]]
+
+    ## sadly, 18.04's iproute2 4.15.0-2ubun doesn't support the -j flag,
+    ## so we'll need to use a regex to parse 'ip' command output.
+    #out, _, _ = call_throws(['ip', '-j', 'route', 'ls'])
+    #j = json.loads(out)
+    #for x in j:
+
+    out, _, _ = call_throws([find_executable('ip'), 'route', 'ls'])
+    return _parse_ip_route(out)
+
+def _parse_ip_route(out):
+    r = {}  # type: Dict[str,List[str]]
+    p = re.compile(r'^(\S+) (.*)scope link (.*)src (\S+)')
+    for line in out.splitlines():
+        m = p.findall(line)
+        if not m:
+            continue
+        net = m[0][0]
+        ip = m[0][3]
+        if net not in r:
+            r[net] = []
+        r[net].append(ip)
+    return r
+
+def command_list_networks():
+    # type: () -> None
+    r = list_networks()
+    print(json.dumps(r, indent=4))
+
+##################################
+
 def command_ls():
     # type: () -> None
     ls = list_daemons(detail=not args.no_detail,
@@ -3247,9 +3318,9 @@ class CustomValidation(argparse.Action):
 ##################################
 
 def get_distro():
-    id_ = None
-    version = None
-    codename = None
+    distro = None
+    distro_version = None
+    distro_codename = None
     with open('/etc/os-release', 'r') as f:
         for line in f.readlines():
             line = line.strip()
@@ -3259,20 +3330,21 @@ def get_distro():
             if val[0] == '"' and val[-1] == '"':
                 val = val[1:-1]
             if var == 'ID':
-                id_ = val.lower()
+                distro = val.lower()
             elif var == 'VERSION_ID':
-                version = val.lower()
+                distro_version = val.lower()
             elif var == 'VERSION_CODENAME':
-                codename = val.lower()
-    return id_, version, codename
+                distro_codename = val.lower()
+    return distro, distro_version, distro_codename
 
 class Packager(object):
-    def __init__(self, stable=None, branch=None, commit=None):
+    def __init__(self, stable=None, version=None, branch=None, commit=None):
         assert \
             (stable and not branch and not commit) or \
-            (not stable and branch) or \
-            (not stable and not branch and not commit)
+            (not stable and not version and branch) or \
+            (not stable and not version and not branch and not commit)
         self.stable = stable
+        self.version = version
         self.branch = branch
         self.commit = commit
 
@@ -3282,13 +3354,13 @@ class Packager(object):
     def rm_repo(self):
         raise NotImplementedError
 
-    def query_shaman(self, distro, version, branch, commit):
+    def query_shaman(self, distro, distro_version, branch, commit):
         # query shaman
         logging.info('Fetching repo metadata from shaman and chacra...')
-        shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{version}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
+        shaman_url = 'https://shaman.ceph.com/api/repos/ceph/{branch}/{sha1}/{distro}/{distro_version}/repo/?arch={arch}'.format(
             distro=distro,
-            distro_version=version,
-            version=branch,
+            distro_version=distro_version,
+            branch=branch,
             sha1=commit or 'latest',
             arch=get_arch()
         )
@@ -3326,11 +3398,12 @@ class Apt(Packager):
         'debian': 'debian',
     }
 
-    def __init__(self, stable, branch, commit,
-                 distro, version, codename):
-        super(Apt, self).__init__(stable=stable, branch=branch, commit=commit)
+    def __init__(self, stable, version, branch, commit,
+                 distro, distro_version, distro_codename):
+        super(Apt, self).__init__(stable=stable, version=version,
+                                  branch=branch, commit=commit)
         self.distro = self.DISTRO_NAMES[distro]
-        self.codename = codename
+        self.distro_codename = distro_codename
 
     def repo_path(self):
         return '/etc/apt/sources.list.d/ceph.list'
@@ -3349,10 +3422,15 @@ class Apt(Packager):
             f.write(key)
 
         if self.stable:
-            content = 'deb %s/debian-%s/ %s main\n' % (
-                args.repo_url, self.stable, self.codename)
+            if self.version:
+                content = 'deb %s/debian-%s-%s/ %s main\n' % (
+                    args.repo_url, self.stable, self.version,
+                    self.distro_codename)
+            else:
+                content = 'deb %s/debian-%s/ %s main\n' % (
+                    args.repo_url, self.stable, self.distro_codename)
         else:
-            content = self.query_shaman(self.distro, self.codename, self.branch,
+            content = self.query_shaman(self.distro, self.distro_codename, self.branch,
                                         self.commit)
 
         logging.info('Installing repo file at %s...' % self.repo_path())
@@ -3395,10 +3473,11 @@ class YumDnf(Packager):
         'fedora': ('fedora', 'fc'),
     }
 
-    def __init__(self, stable, branch, commit,
-                 distro, version):
-        super(YumDnf, self).__init__(stable=stable, branch=branch, commit=commit)
-        self.major = int(version.split('.')[0])
+    def __init__(self, stable, version, branch, commit,
+                 distro, distro_version):
+        super(YumDnf, self).__init__(stable=stable, version=version,
+                                     branch=branch, commit=commit)
+        self.major = int(distro_version.split('.')[0])
         self.distro_normalized = self.DISTRO_NAMES[distro][0]
         self.distro_code = self.DISTRO_NAMES[distro][1] + str(self.major)
         if (self.distro_code == 'fc' and self.major >= 30) or \
@@ -3468,7 +3547,13 @@ class YumDnf(Packager):
 
     def repo_baseurl(self):
         assert self.stable
-        return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro_code)
+        if self.version:
+            return '%s/rpm-%s-%s/%s' % (args.repo_url, self.stable,
+                                        self.version,
+                                        self.distro_code)
+        else:
+            return '%s/rpm-%s/%s' % (args.repo_url, self.stable,
+                                     self.distro_code)
 
     def add_repo(self):
         if self.stable:
@@ -3525,14 +3610,15 @@ class Zypper(Packager):
         'opensuse-leap'
     ]
 
-    def __init__(self, stable, branch, commit,
-                 distro, version):
-        super(Zypper, self).__init__(stable=stable, branch=branch, commit=commit)
+    def __init__(self, stable, version, branch, commit,
+                 distro, distro_version):
+        super(Zypper, self).__init__(stable=stable, version=version,
+                                     branch=branch, commit=commit)
         self.tool = 'zypper'
         self.distro = 'opensuse'
-        self.version = '15.1'
-        if 'tumbleweed' not in distro and version is not None:
-            self.version = version
+        self.distro_version = '15.1'
+        if 'tumbleweed' not in distro and distro_version is not None:
+            self.distro_version = distro_version
 
     def custom_repo(self, **kw):
         """
@@ -3568,6 +3654,9 @@ class Zypper(Packager):
 
     def repo_baseurl(self):
         assert self.stable
+        if self.version:
+            return '%s/rpm-%s-%s/%s' % (args.repo_url, self.stable,
+                                        self.version, self.distro)
         return '%s/rpm-%s/%s' % (args.repo_url, self.stable, self.distro)
 
     def add_repo(self):
@@ -3587,7 +3676,7 @@ class Zypper(Packager):
                 )
                 content += '\n\n'
         else:
-            content = self.query_shaman(self.distro, self.version,
+            content = self.query_shaman(self.distro, self.distro_version,
                                         self.branch,
                                         self.commit)
 
@@ -3607,22 +3696,49 @@ class Zypper(Packager):
         self.install(['podman'])
 
 
-def create_packager(stable=None, branch=None, commit=None):
-    distro, version, codename = get_distro()
+def create_packager(stable=None, version=None, branch=None, commit=None):
+    distro, distro_version, distro_codename = get_distro()
     if distro in YumDnf.DISTRO_NAMES:
-        return YumDnf(stable=stable, branch=branch, commit=commit,
-                   distro=distro, version=version)
+        return YumDnf(stable=stable, version=version,
+                      branch=branch, commit=commit,
+                   distro=distro, distro_version=distro_version)
     elif distro in Apt.DISTRO_NAMES:
-        return Apt(stable=stable, branch=branch, commit=commit,
-                   distro=distro, version=version, codename=codename)
+        return Apt(stable=stable, version=version,
+                   branch=branch, commit=commit,
+                   distro=distro, distro_version=distro_version,
+                   distro_codename=distro_codename)
     elif distro in Zypper.DISTRO_NAMES:
-        return Zypper(stable=stable, branch=branch, commit=commit,
-                      distro=distro, version=version)
-    raise Error('Distro %s version %s not supported' % (distro, version))
+        return Zypper(stable=stable, version=version,
+                      branch=branch, commit=commit,
+                      distro=distro, distro_version=distro_version)
+    raise Error('Distro %s version %s not supported' % (distro, distro_version))
 
 
 def command_add_repo():
-    pkg = create_packager(stable=args.release, branch=args.dev,
+    if args.version and args.release:
+        raise Error('you can specify either --release or --version but not both')
+    if args.version:
+        try:
+            (x, y, z) = args.version.split('.')
+        except Exception as e:
+            raise Error('version must be in the form x.y.z (e.g., 15.2.0)')
+        relnames = {
+            '16': 'pacific',
+            '15': 'octopus',
+            '14': 'nautilus',
+            '13': 'mimic',
+            '12': 'luminous',
+            '11': 'kraken',
+            '10': 'jewel',
+        }
+        args.release = relnames.get(x, None)
+        if not args.release:
+            raise Error('unknown release %s (not in %s)' % (
+                x, ' '.join(relnames.values())))
+
+    pkg = create_packager(stable=args.release,
+                          version=args.version,
+                          branch=args.dev,
                           commit=args.dev_commit)
     pkg.add_repo()
 
@@ -3706,6 +3822,10 @@ def _get_parser():
         default='/',
         help='base directory for legacy daemon data')
 
+    parser_list_networks = subparsers.add_parser(
+        'list-networks', help='list IP networks')
+    parser_list_networks.set_defaults(func=command_list_networks)
+
     parser_adopt = subparsers.add_parser(
         'adopt', help='adopt daemon deployed with a different tool')
     parser_adopt.set_defaults(func=command_adopt)
@@ -3916,6 +4036,10 @@ def _get_parser():
         '--dashboard-crt',
         help='Dashboard certificate')
 
+    parser_bootstrap.add_argument(
+        '--skip-mon-network',
+        action='store_true',
+        help='set mon public_network based on bootstrap mon ip')
     parser_bootstrap.add_argument(
         '--skip-dashboard',
         action='store_true',
@@ -3956,6 +4080,10 @@ def _get_parser():
         '--orphan-initial-daemons',
         action='store_true',
         help='Do not create initial mon, mgr, and crash service specs')
+    parser_bootstrap.add_argument(
+        '--skip-monitoring-stack',
+        action='store_true',
+        help='Do not automatically provision monitoring stack (prometheus, grafana, alertmanager, node-exporter)')
 
     parser_deploy = subparsers.add_parser(
         'deploy', help='deploy a daemon')
@@ -3992,6 +4120,10 @@ def _get_parser():
         '--reconfig',
         action='store_true',
         help='Reconfigure a previously deployed daemon')
+    parser_deploy.add_argument(
+        '--allow-ptrace',
+        action='store_true',
+        help='Allow SYS_PTRACE on daemon container')
 
     parser_check_host = subparsers.add_parser(
         'check-host', help='check host configuration')
@@ -4012,7 +4144,10 @@ def _get_parser():
     parser_add_repo.set_defaults(func=command_add_repo)
     parser_add_repo.add_argument(
         '--release',
-        help='use specified upstream release')
+        help='use latest version of a named release (e.g., octopus)')
+    parser_add_repo.add_argument(
+        '--version',
+        help='use specific upstream version (x.y.z)')
     parser_add_repo.add_argument(
         '--dev',
         help='use specified bleeding edge build from git branch or tag')
@@ -4024,7 +4159,7 @@ def _get_parser():
         help='specify alternative GPG key location')
     parser_add_repo.add_argument(
         '--repo-url',
-        default='https://download.ceph.com/',
+        default='https://download.ceph.com',
         help='specify alternative repo location')
     # TODO: proxy?
 
diff --git a/src/cephadm/tests/test_cephadm.py b/src/cephadm/tests/test_cephadm.py
index b0dad3cd42d67..311abbe39583e 100644
--- a/src/cephadm/tests/test_cephadm.py
+++ b/src/cephadm/tests/test_cephadm.py
@@ -43,3 +43,56 @@ def test_parse_podman_version_invalid(self):
         with pytest.raises(ValueError) as res:
             cd._parse_podman_version('podman version inval.id')
         assert 'inval' in str(res.value)
+
+    @pytest.mark.parametrize("test_input, expected", [
+        (
+"""
+default via 192.168.178.1 dev enxd89ef3f34260 proto dhcp metric 100
+10.0.0.0/8 via 10.4.0.1 dev tun0 proto static metric 50
+10.3.0.0/21 via 10.4.0.1 dev tun0 proto static metric 50
+10.4.0.1 dev tun0 proto kernel scope link src 10.4.0.2 metric 50
+137.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+138.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+139.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+140.1.0.0/17 via 10.4.0.1 dev tun0 proto static metric 50
+141.1.0.0/16 via 10.4.0.1 dev tun0 proto static metric 50
+169.254.0.0/16 dev docker0 scope link metric 1000
+172.17.0.0/16 dev docker0 proto kernel scope link src 172.17.0.1
+192.168.39.0/24 dev virbr1 proto kernel scope link src 192.168.39.1 linkdown
+192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+192.168.178.0/24 dev enxd89ef3f34260 proto kernel scope link src 192.168.178.28 metric 100
+192.168.178.1 dev enxd89ef3f34260 proto static scope link metric 100
+195.135.221.12 via 192.168.178.1 dev enxd89ef3f34260 proto static metric 100
+""",
+            {
+                '10.4.0.1': ['10.4.0.2'],
+                '172.17.0.0/16': ['172.17.0.1'],
+                '192.168.39.0/24': ['192.168.39.1'],
+                '192.168.122.0/24': ['192.168.122.1'],
+                '192.168.178.0/24': ['192.168.178.28']
+            }
+        ),        (
+"""
+default via 10.3.64.1 dev eno1 proto static metric 100
+10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.23 metric 100
+10.3.64.0/24 dev eno1 proto kernel scope link src 10.3.64.27 metric 100
+10.88.0.0/16 dev cni-podman0 proto kernel scope link src 10.88.0.1 linkdown
+172.21.0.0/20 via 172.21.3.189 dev tun0
+172.21.1.0/20 via 172.21.3.189 dev tun0
+172.21.2.1 via 172.21.3.189 dev tun0
+172.21.3.1 dev tun0 proto kernel scope link src 172.21.3.2
+172.21.4.0/24 via 172.21.3.1 dev tun0
+172.21.5.0/24 via 172.21.3.1 dev tun0
+172.21.6.0/24 via 172.21.3.1 dev tun0
+172.21.7.0/24 via 172.21.3.1 dev tun0
+192.168.122.0/24 dev virbr0 proto kernel scope link src 192.168.122.1 linkdown
+""",
+            {
+                '10.3.64.0/24': ['10.3.64.23', '10.3.64.27'],
+                '10.88.0.0/16': ['10.88.0.1'],
+                '172.21.3.1': ['172.21.3.2'],
+                '192.168.122.0/24': ['192.168.122.1']}
+        ),
+    ])
+    def test_parse_ip_route(self, test_input, expected):
+        assert cd._parse_ip_route(test_input) == expected
diff --git a/src/common/options.cc b/src/common/options.cc
index a9162ebe20e13..50d123f9085bc 100644
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -439,7 +439,8 @@ std::vector<Option> get_global_options() {
     Option("container_image", Option::TYPE_STR, Option::LEVEL_BASIC)
     .set_description("container image (used by cephadm orchestrator)")
     .set_flag(Option::FLAG_STARTUP)
-    .set_default("docker.io/ceph/daemon-base:latest-master-devel"),
+    .set_default("quay.io/ceph-ci/ceph:octopus"),
+    //.set_default("docker.io/ceph/ceph:v15.2"),
 
     Option("no_config_file", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
     .set_default(false)
diff --git a/src/librbd/io/AioCompletion.cc b/src/librbd/io/AioCompletion.cc
index 603c1f18fb1a8..afd4a30d76883 100644
--- a/src/librbd/io/AioCompletion.cc
+++ b/src/librbd/io/AioCompletion.cc
@@ -104,12 +104,10 @@ void AioCompletion::complete() {
       complete_external_callback();
     } else {
       complete_cb(rbd_comp, complete_arg);
+      complete_event_socket();
     }
-  }
-
-  if (ictx != nullptr && event_notify && ictx->event_socket.is_valid()) {
-    ictx->event_socket_completions.push(this);
-    ictx->event_socket.notify();
+  } else {
+    complete_event_socket();
   }
   state = AIO_STATE_COMPLETE;
 
@@ -261,6 +259,7 @@ void AioCompletion::complete_external_callback() {
     AioCompletion* aio_comp;
     while (ictx->external_callback_completions.pop(aio_comp)) {
       aio_comp->complete_cb(aio_comp->rbd_comp, aio_comp->complete_arg);
+      aio_comp->complete_event_socket();
     }
 
     ictx->external_callback_in_progress.store(false);
@@ -272,5 +271,12 @@ void AioCompletion::complete_external_callback() {
   }
 }
 
+void AioCompletion::complete_event_socket() {
+  if (ictx != nullptr && event_notify && ictx->event_socket.is_valid()) {
+    ictx->event_socket_completions.push(this);
+    ictx->event_socket.notify();
+  }
+}
+
 } // namespace io
 } // namespace librbd
diff --git a/src/librbd/io/AioCompletion.h b/src/librbd/io/AioCompletion.h
index cdaaa6a32b503..9df547cd2413c 100644
--- a/src/librbd/io/AioCompletion.h
+++ b/src/librbd/io/AioCompletion.h
@@ -178,6 +178,7 @@ struct AioCompletion {
 private:
   void queue_complete();
   void complete_external_callback();
+  void complete_event_socket();
 
 };
 
diff --git a/src/mon/MgrMonitor.cc b/src/mon/MgrMonitor.cc
index 397c3700c21d7..ef126edd65222 100644
--- a/src/mon/MgrMonitor.cc
+++ b/src/mon/MgrMonitor.cc
@@ -1051,7 +1051,13 @@ bool MgrMonitor::prepare_command(MonOpRequestRef op)
 
   if (prefix == "mgr fail") {
     string who;
-    cmd_getval(cmdmap, "who", who);
+    if (!cmd_getval(cmdmap, "who", who)) {
+      if (!map.active_gid) {
+	ss << "Currently no active mgr";
+	goto out;
+      }
+      who = map.active_name;
+    }
 
     std::string err;
     uint64_t gid = strict_strtol(who.c_str(), 10, &err);
diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h
index f0811e0cd89c6..d1cbd336e8f55 100644
--- a/src/mon/MonCommands.h
+++ b/src/mon/MonCommands.h
@@ -1161,7 +1161,7 @@ COMMAND("mgr dump "
 	"name=epoch,type=CephInt,range=0,req=false",
 	"dump the latest MgrMap",
 	"mgr", "r")
-COMMAND("mgr fail name=who,type=CephString",
+COMMAND("mgr fail name=who,type=CephString,req=false",
 	"treat the named manager daemon as failed", "mgr", "rw")
 COMMAND("mgr module ls",
 	"list active mgr modules", "mgr", "r")
diff --git a/src/pybind/mgr/balancer/module.py b/src/pybind/mgr/balancer/module.py
index e70a7baee0f31..36e0213f92e8c 100644
--- a/src/pybind/mgr/balancer/module.py
+++ b/src/pybind/mgr/balancer/module.py
@@ -796,15 +796,14 @@ def calc_eval(self, ms, pools):
             pgs_by_osd = {}
             objects_by_osd = {}
             bytes_by_osd = {}
-            for root in pe.pool_roots[pool]:
-                for osd in pe.target_by_root[root]:
-                    pgs_by_osd[osd] = 0
-                    objects_by_osd[osd] = 0
-                    bytes_by_osd[osd] = 0
             for pgid, up in six.iteritems(pm):
                 for osd in [int(osd) for osd in up]:
                     if osd == CRUSHMap.ITEM_NONE:
                         continue
+                    if osd not in pgs_by_osd:
+                        pgs_by_osd[osd] = 0
+                        objects_by_osd[osd] = 0
+                        bytes_by_osd[osd] = 0
                     pgs_by_osd[osd] += 1
                     objects_by_osd[osd] += ms.pg_stat[pgid]['num_objects']
                     bytes_by_osd[osd] += ms.pg_stat[pgid]['num_bytes']
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py
index 2a357d4a4a961..d7cee0ce7bdf0 100644
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -31,7 +31,8 @@
 from ceph.deployment import inventory, translate
 from ceph.deployment.drive_group import DriveGroupSpec
 from ceph.deployment.drive_selection import selector
-from ceph.deployment.service_spec import HostPlacementSpec, ServiceSpec, PlacementSpec
+from ceph.deployment.service_spec import HostPlacementSpec, ServiceSpec, PlacementSpec, \
+    assert_valid_host
 
 from mgr_module import MgrModule
 import orchestrator
@@ -107,18 +108,6 @@ def name_to_config_section(name):
     else:
         return 'mon'
 
-def assert_valid_host(name):
-    p = re.compile('^[a-zA-Z0-9-]+$')
-    try:
-        assert len(name) <= 250, 'name is too long (max 250 chars)'
-        parts = name.split('.')
-        for part in name.split('.'):
-            assert len(part) > 0, '.-delimited name component must not be empty'
-            assert len(part) <= 63, '.-delimited name component must not be more than 63 chars'
-            assert p.match(part), 'name component must include only a-z, 0-9, and -'
-    except AssertionError as e:
-        raise OrchestratorError(e)
-
 
 class SpecStore():
     def __init__(self, mgr):
@@ -180,10 +169,12 @@ def __init__(self, mgr):
         self.daemons = {}   # type: Dict[str, Dict[str, orchestrator.DaemonDescription]]
         self.last_daemon_update = {}   # type: Dict[str, datetime.datetime]
         self.devices = {}              # type: Dict[str, List[inventory.Device]]
+        self.networks = {}             # type: Dict[str, Dict[str, List[str]]]
         self.last_device_update = {}   # type: Dict[str, datetime.datetime]
         self.daemon_refresh_queue = [] # type: List[str]
         self.device_refresh_queue = [] # type: List[str]
         self.daemon_config_deps = {}   # type: Dict[str, Dict[str, Dict[str,Any]]]
+        self.last_host_check = {}      # type: Dict[str, datetime.datetime]
 
     def load(self):
         # type: () -> None
@@ -205,20 +196,28 @@ def load(self):
                 self.daemon_refresh_queue.append(host)
                 self.daemons[host] = {}
                 self.devices[host] = []
+                self.networks[host] = {}
                 self.daemon_config_deps[host] = {}
                 for name, d in j.get('daemons', {}).items():
                     self.daemons[host][name] = \
                         orchestrator.DaemonDescription.from_json(d)
                 for d in j.get('devices', []):
                     self.devices[host].append(inventory.Device.from_json(d))
+                self.networks[host] = j.get('networks', {})
                 for name, d in j.get('daemon_config_deps', {}).items():
                     self.daemon_config_deps[host][name] = {
                         'deps': d.get('deps', []),
                         'last_config': datetime.datetime.strptime(
                             d['last_config'], DATEFMT),
                     }
-                self.mgr.log.debug('HostCache.load: host %s has %d daemons, %d devices' % (
-                    host, len(self.daemons[host]), len(self.devices[host])))
+                if 'last_host_check' in j:
+                    self.last_host_check[host] = datetime.datetime.strptime(
+                        j['last_host_check'], DATEFMT)
+                self.mgr.log.debug(
+                    'HostCache.load: host %s has %d daemons, '
+                    '%d devices, %d networks' % (
+                        host, len(self.daemons[host]), len(self.devices[host]),
+                        len(self.networks[host])))
             except Exception as e:
                 self.mgr.log.warning('unable to load cached state for %s: %s' % (
                     host, e))
@@ -229,9 +228,10 @@ def update_host_daemons(self, host, dm):
         self.daemons[host] = dm
         self.last_daemon_update[host] = datetime.datetime.utcnow()
 
-    def update_host_devices(self, host, dls):
-        # type: (str, List[inventory.Device]) -> None
+    def update_host_devices_networks(self, host, dls, nets):
+        # type: (str, List[inventory.Device], Dict[str,List[str]]) -> None
         self.devices[host] = dls
+        self.networks[host] = nets
         self.last_device_update[host] = datetime.datetime.utcnow()
 
     def update_daemon_config_deps(self, host, name, deps, stamp):
@@ -239,6 +239,10 @@ def update_daemon_config_deps(self, host, name, deps, stamp):
             'deps': deps,
             'last_config': stamp,
         }
+ 
+    def update_last_host_check(self, host):
+        # type: (str) -> None
+        self.last_host_check[host] = datetime.datetime.utcnow()
 
     def prime_empty_host(self, host):
         # type: (str) -> None
@@ -247,6 +251,7 @@ def prime_empty_host(self, host):
         """
         self.daemons[host] = {}
         self.devices[host] = []
+        self.networks[host] = {}
         self.daemon_config_deps[host] = {}
         self.daemon_refresh_queue.append(host)
         self.device_refresh_queue.append(host)
@@ -280,11 +285,14 @@ def save_host(self, host):
             j['daemons'][name] = dd.to_json()  # type: ignore
         for d in self.devices[host]:
             j['devices'].append(d.to_json())  # type: ignore
+        j['networks'] = self.networks[host]
         for name, depi in self.daemon_config_deps[host].items():
             j['daemon_config_deps'][name] = {   # type: ignore
                 'deps': depi.get('deps', []),
                 'last_config': depi['last_config'].strftime(DATEFMT),
             }
+        if host in self.last_host_check:
+            j['last_host_check']= self.last_host_check[host].strftime(DATEFMT)
         self.mgr.set_store(HOST_CACHE_PREFIX + host, json.dumps(j))
 
     def rm_host(self, host):
@@ -293,6 +301,8 @@ def rm_host(self, host):
             del self.daemons[host]
         if host in self.devices:
             del self.devices[host]
+        if host in self.networks:
+            del self.networks[host]
         if host in self.last_daemon_update:
             del self.last_daemon_update[host]
         if host in self.last_device_update:
@@ -362,6 +372,12 @@ def host_needs_device_refresh(self, host):
             return True
         return False
 
+    def host_needs_check(self, host):
+        # type: (str) -> bool
+        cutoff = datetime.datetime.utcnow() - datetime.timedelta(
+            seconds=self.mgr.host_check_interval)
+        return host not in self.last_host_check or self.last_host_check[host] < cutoff
+
     def add_daemon(self, host, dd):
         # type: (str, orchestrator.DaemonDescription) -> None
         assert host in self.daemons
@@ -527,12 +543,10 @@ def async_map_completion(f):
 
 def trivial_completion(f):
     # type: (Callable) -> Callable[..., orchestrator.Completion]
-    return ssh_completion(cls=orchestrator.Completion)(f)
-
-
-def trivial_result(val):
-    # type: (Any) -> AsyncCompletion
-    return AsyncCompletion(value=val, name='trivial_result')
+    @wraps(f)
+    def wrapper(*args, **kwargs):
+        return AsyncCompletion(value=f(*args, **kwargs), name=f.__name__)
+    return wrapper
 
 
 @six.add_metaclass(CLICommandMeta)
@@ -561,6 +575,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             'default': 10 * 60,
             'desc': 'seconds to cache service (daemon) inventory',
         },
+        {
+            'name': 'host_check_interval',
+            'type': 'secs',
+            'default': 10 * 60,
+            'desc': 'how frequently to perform a host check',
+        },
         {
             'name': 'mode',
             'type': 'str',
@@ -600,6 +620,16 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
             'default': True,
             'desc': 'log to the "cephadm" cluster log channel"',
         },
+        {
+            'name': 'allow_ptrace',
+            'type': 'bool',
+            'default': False,
+            'desc': 'allow SYS_PTRACE capability on ceph containers',
+            'long_desc': 'The SYS_PTRACE capability is needed to attach to a '
+                         'process with gdb or strace.  Enabling this options '
+                         'can allow debugging daemons that encounter problems '
+                         'at runtime.',
+        },
     ]
 
     def __init__(self, *args, **kwargs):
@@ -620,11 +650,13 @@ def __init__(self, *args, **kwargs):
             self.ssh_config_file = None  # type: Optional[str]
             self.device_cache_timeout = 0
             self.daemon_cache_timeout = 0
+            self.host_check_interval = 0
             self.mode = ''
             self.container_image_base = ''
             self.warn_on_stray_hosts = True
             self.warn_on_stray_daemons = True
             self.warn_on_failed_host_check = True
+            self.allow_ptrace = False
 
         self._cons = {}  # type: Dict[str, Tuple[remoto.backends.BaseConnection,remoto.backends.LegacyModuleExecute]]
 
@@ -999,6 +1031,26 @@ def _check_hosts(self):
             }
         self.set_health_checks(self.health_checks)
 
+    def _check_host(self, host):
+        if host not in self.inventory:
+            return
+        self.log.debug(' checking %s' % host)
+        try:
+            out, err, code = self._run_cephadm(
+                host, 'client', 'check-host', [],
+                error_ok=True, no_fsid=True)
+            self.cache.update_last_host_check(host)
+            self.cache.save_host(host)
+            if code:
+                self.log.debug(' host %s failed check' % host)
+                if self.warn_on_failed_host_check:
+                    return 'host %s failed check: %s' % (host, err)
+            else:
+                self.log.debug(' host %s ok' % host)
+        except Exception as e:
+            self.log.debug(' host %s failed check' % host)
+            return 'host %s failed check: %s' % (host, e)
+
     def _check_for_strays(self):
         self.log.debug('_check_for_strays')
         for k in ['CEPHADM_STRAY_HOST',
@@ -1056,12 +1108,16 @@ def serve(self):
         # type: () -> None
         self.log.debug("serve starting")
         while self.run:
-            self._check_hosts()
 
             # refresh daemons
             self.log.debug('refreshing hosts')
+            bad_hosts = []
             failures = []
             for host in self.cache.get_hosts():
+                if self.cache.host_needs_check(host):
+                    r = self._check_host(host)
+                    if r is not None:
+                        bad_hosts.append(r)
                 if self.cache.host_needs_daemon_refresh(host):
                     self.log.debug('refreshing %s daemons' % host)
                     r = self._refresh_host_daemons(host)
@@ -1072,6 +1128,19 @@ def serve(self):
                     r = self._refresh_host_devices(host)
                     if r:
                         failures.append(r)
+
+            health_changed = False
+            if 'CEPHADM_HOST_CHECK_FAILED' in self.health_checks:
+                del self.health_checks['CEPHADM_HOST_CHECK_FAILED']
+                health_changed = True
+            if bad_hosts:
+                self.health_checks['CEPHADM_HOST_CHECK_FAILED'] = {
+                    'severity': 'warning',
+                    'summary': '%d hosts fail cephadm check' % len(bad_hosts),
+                    'count': len(bad_hosts),
+                    'detail': bad_hosts,
+                }
+                health_changed = True
             if failures:
                 self.health_checks['CEPHADM_REFRESH_FAILED'] = {
                     'severity': 'warning',
@@ -1079,11 +1148,15 @@ def serve(self):
                     'count': len(failures),
                     'detail': failures,
                 }
-                self.set_health_checks(self.health_checks)
+                health_changed = True
             elif 'CEPHADM_REFRESH_FAILED' in self.health_checks:
                 del self.health_checks['CEPHADM_REFRESH_FAILED']
+                health_changed = True
+            if health_changed:
                 self.set_health_checks(self.health_checks)
 
+
+
             self._check_for_strays()
 
             if self.paused:
@@ -1380,7 +1453,7 @@ def _get_user(self):
         'name=host,type=CephString '
         'name=addr,type=CephString,req=false',
         'Check whether we can access and manage a remote host')
-    def _check_host(self, host, addr=None):
+    def check_host(self, host, addr=None):
         out, err, code = self._run_cephadm(host, 'client', 'check-host',
                                            ['--expect-hostname', host],
                                            addr=addr,
@@ -1715,10 +1788,23 @@ def _refresh_host_devices(self, host):
                     host, code, err)
         except Exception as e:
             return 'host %s ceph-volume inventory failed: %s' % (host, e)
-        data = json.loads(''.join(out))
-        self.log.debug('Refreshed host %s devices (%d)' % (host, len(data)))
-        devices = inventory.Devices.from_json(data)
-        self.cache.update_host_devices(host, devices.devices)
+        devices = json.loads(''.join(out))
+        try:
+            out, err, code = self._run_cephadm(
+                host, 'mon',
+                'list-networks',
+                [],
+                no_fsid=True)
+            if code:
+                return 'host %s list-networks returned %d: %s' % (
+                    host, code, err)
+        except Exception as e:
+            return 'host %s list-networks failed: %s' % (host, e)
+        networks = json.loads(''.join(out))
+        self.log.debug('Refreshed host %s devices (%d) networks (%s)' % (
+            host, len(devices), len(networks)))
+        devices = inventory.Devices.from_json(devices)
+        self.cache.update_host_devices_networks(host, devices.devices, networks)
         self.cache.save_host(host)
         return None
 
@@ -1734,6 +1820,7 @@ def _get_spec_size(self, spec):
         # hmm!
         return 0
 
+    @trivial_completion
     def describe_service(self, service_type=None, service_name=None,
                          refresh=False):
         if refresh:
@@ -1789,8 +1876,9 @@ def describe_service(self, service_type=None, service_name=None,
                 size=self._get_spec_size(spec),
                 running=0,
             )
-        return trivial_result([s for n, s in sm.items()])
+        return [s for n, s in sm.items()]
 
+    @trivial_completion
     def list_daemons(self, daemon_type=None, daemon_id=None,
                      host=None, refresh=False):
         if refresh:
@@ -1810,7 +1898,7 @@ def list_daemons(self, daemon_type=None, daemon_id=None,
                 if daemon_id and daemon_id != dd.daemon_id:
                     continue
                 result.append(dd)
-        return trivial_result(result)
+        return result
 
     def service_action(self, action, service_name):
         args = []
@@ -1876,12 +1964,14 @@ def remove_daemons(self, names):
         self.log.info('Remove daemons %s' % [a[0] for a in args])
         return self._remove_daemons(args)
 
+    @trivial_completion
     def remove_service(self, service_name):
         self.log.info('Remove service %s' % service_name)
         self.spec_store.rm(service_name)
         self._kick_serve_loop()
-        return trivial_result(['Removed service %s' % service_name])
+        return ['Removed service %s' % service_name]
 
+    @trivial_completion
     def get_inventory(self, host_filter=None, refresh=False):
         """
         Return the storage inventory of hosts matching the given filter.
@@ -1906,8 +1996,9 @@ def get_inventory(self, host_filter=None, refresh=False):
                 continue
             result.append(orchestrator.InventoryHost(host,
                                                      inventory.Devices(dls)))
-        return trivial_result(result)
+        return result
 
+    @trivial_completion
     def zap_device(self, host, path):
         self.log.info('Zap device %s:%s' % (host, path))
         out, err, code = self._run_cephadm(
@@ -1917,7 +2008,7 @@ def zap_device(self, host, path):
         self.cache.invalidate_host_devices(host)
         if code:
             raise OrchestratorError('Zap failed: %s' % '\n'.join(out + err))
-        return trivial_result('\n'.join(out + err))
+        return '\n'.join(out + err)
 
     def blink_device_light(self, ident_fault, on, locs):
         @async_map_completion
@@ -1955,14 +2046,12 @@ def get_osd_uuid_map(self, only_up=False):
                 r[str(o['osd'])] = o['uuid']
         return r
 
-    def apply_drivegroups(self, specs: List[DriveGroupSpec]) -> Sequence[orchestrator.Completion]:
-        completions: List[orchestrator.Completion] = list()
-        for spec in specs:
-            completions.extend(self._apply(spec))
-        return completions
+    @trivial_completion
+    def apply_drivegroups(self, specs: List[DriveGroupSpec]):
+        return [self._apply(spec) for spec in specs]
 
-    def create_osds(self, drive_group):
-        # type: (DriveGroupSpec) -> orchestrator.Completion
+    @trivial_completion
+    def create_osds(self, drive_group: DriveGroupSpec):
         self.log.debug("Processing DriveGroup {}".format(drive_group))
         # 1) use fn_filter to determine matching_hosts
         matching_hosts = drive_group.placement.pattern_matches_hosts([x for x in self.cache.get_hosts()])
@@ -1992,7 +2081,7 @@ def _find_inv_for_host(hostname: str, inventory_dict: dict):
                 drive_group.service_name(), host))
             ret_msg = self._create_osd(host, cmd)
             ret.append(ret_msg)
-        return trivial_result(", ".join(ret))
+        return ", ".join(ret)
 
     def _create_osd(self, host, cmd):
 
@@ -2145,6 +2234,8 @@ def _create_daemon(self, daemon_type, daemon_id, host,
 
         if reconfig:
             extra_args.append('--reconfig')
+        if self.allow_ptrace:
+            extra_args.append('--allow-ptrace')
 
         self.log.info('%s daemon %s on %s' % (
             'Reconfiguring' if reconfig else 'Deploying',
@@ -2210,6 +2301,11 @@ def _apply_service(self, spec):
         on the target label and count specified in the placement.
         """
         daemon_type = spec.service_type
+        service_name = spec.service_name()
+        if spec.unmanaged:
+            self.log.debug('Skipping unmanaged service %s spec' % service_name)
+            return False
+        self.log.debug('Applying service %s spec' % service_name)
         create_fns = {
             'mon': self._create_mon,
             'mgr': self._create_mgr,
@@ -2230,16 +2326,36 @@ def _apply_service(self, spec):
         create_func = create_fns.get(daemon_type, None)
         if not create_func:
             self.log.debug('unrecognized service type %s' % daemon_type)
-            return trivial_result([])
+            return False
         config_func = config_fns.get(daemon_type, None)
 
-        service_name = spec.service_name()
-        self.log.debug('Applying service %s spec' % service_name)
         daemons = self.cache.get_daemons_by_service(service_name)
+
+        public_network = None
+        if daemon_type == 'mon':
+            ret, out, err = self.mon_command({
+                'prefix': 'config get',
+                'who': 'mon',
+                'key': 'public_network',
+            })
+            if '/' in out:
+                public_network = out.strip()
+                self.log.debug('mon public_network is %s' % public_network)
+
+        def matches_network(host):
+            # type: (str) -> bool
+            if not public_network:
+                return False
+            # make sure we have 1 or more IPs for that network on that
+            # host
+            return len(self.cache.networks[host].get(public_network, [])) > 0
+
         hosts = HostAssignment(
             spec=spec,
             get_hosts_func=self._get_hosts,
-            get_daemons_func=self.cache.get_daemons_by_service).place()
+            get_daemons_func=self.cache.get_daemons_by_service,
+            filter_new_host=matches_network if daemon_type == 'mon' else None,
+        ).place()
 
         r = False
 
@@ -2315,13 +2431,17 @@ def _check_daemons(self):
         grafanas = []  # type: List[orchestrator.DaemonDescription]
         for dd in daemons:
             # orphan?
-            if dd.service_name() not in self.spec_store.specs and \
-               dd.daemon_type not in ['mon', 'mgr', 'osd']:
+            spec = self.spec_store.specs.get(dd.service_name(), None)
+            if not spec and dd.daemon_type not in ['mon', 'mgr', 'osd']:
                 # (mon and mgr specs should always exist; osds aren't matched
                 # to a service spec)
                 self.log.info('Removing orphan daemon %s...' % dd.name())
                 self._remove_daemon(dd.name(), dd.hostname)
 
+            # ignore unmanaged services
+            if not spec or spec.unmanaged:
+                continue
+
             # dependencies?
             if dd.daemon_type == 'grafana':
                 # put running instances at the front of the list
@@ -2418,6 +2538,7 @@ def create_func_map(*args):
 
         return create_func_map(args)
 
+    @trivial_completion
     def apply_mon(self, spec):
         return self._apply(spec)
 
@@ -2485,7 +2606,7 @@ def add_mgr(self, spec):
         # type: (ServiceSpec) -> orchestrator.Completion
         return self._add_daemon('mgr', spec, self._create_mgr)
 
-    def _apply(self, spec):
+    def _apply(self, spec: ServiceSpec) -> str:
         if spec.placement.is_empty():
             # fill in default placement
             defaults = {
@@ -2506,20 +2627,32 @@ def _apply(self, spec):
              spec.placement.count < 1:
             raise OrchestratorError('cannot scale %s service below 1' % (
                 spec.service_type))
+
+        HostAssignment(
+            spec=spec,
+            get_hosts_func=self._get_hosts,
+            get_daemons_func=self.cache.get_daemons_by_service,
+        ).validate()
+
         self.log.info('Saving service %s spec with placement %s' % (
             spec.service_name(), spec.placement.pretty_str()))
         self.spec_store.save(spec)
         self._kick_serve_loop()
-        return trivial_result("Scheduled %s update..." % spec.service_type)
+        return "Scheduled %s update..." % spec.service_type
 
+    @trivial_completion
+    def apply(self, specs: List[ServiceSpec]):
+        return [self._apply(spec) for spec in specs]
+
+    @trivial_completion
     def apply_mgr(self, spec):
         return self._apply(spec)
 
-    def add_mds(self, spec):
-        # type: (ServiceSpec) -> AsyncCompletion
+    def add_mds(self, spec: ServiceSpec):
         return self._add_daemon('mds', spec, self._create_mds, self._config_mds)
 
-    def apply_mds(self, spec: ServiceSpec) -> orchestrator.Completion:
+    @trivial_completion
+    def apply_mds(self, spec: ServiceSpec):
         return self._apply(spec)
 
     def _config_mds(self, spec):
@@ -2571,6 +2704,7 @@ def _create_rgw(self, rgw_id, host):
         })
         return self._create_daemon('rgw', rgw_id, host, keyring=keyring)
 
+    @trivial_completion
     def apply_rgw(self, spec):
         return self._apply(spec)
 
@@ -2587,6 +2721,7 @@ def _create_rbd_mirror(self, daemon_id, host):
         return self._create_daemon('rbd-mirror', daemon_id, host,
                                    keyring=keyring)
 
+    @trivial_completion
     def apply_rbd_mirror(self, spec):
         return self._apply(spec)
 
@@ -2809,6 +2944,7 @@ def add_prometheus(self, spec):
     def _create_prometheus(self, daemon_id, host):
         return self._create_daemon('prometheus', daemon_id, host)
 
+    @trivial_completion
     def apply_prometheus(self, spec):
         return self._apply(spec)
 
@@ -2817,6 +2953,7 @@ def add_node_exporter(self, spec):
         return self._add_daemon('node-exporter', spec,
                                 self._create_node_exporter)
 
+    @trivial_completion
     def apply_node_exporter(self, spec):
         return self._apply(spec)
 
@@ -2828,6 +2965,7 @@ def add_crash(self, spec):
         return self._add_daemon('crash', spec,
                                 self._create_crash)
 
+    @trivial_completion
     def apply_crash(self, spec):
         return self._apply(spec)
 
@@ -2844,8 +2982,8 @@ def add_grafana(self, spec):
         # type: (ServiceSpec) -> AsyncCompletion
         return self._add_daemon('grafana', spec, self._create_grafana)
 
-    def apply_grafana(self, spec):
-        # type: (ServiceSpec) -> AsyncCompletion
+    @trivial_completion
+    def apply_grafana(self, spec: ServiceSpec):
         return self._apply(spec)
 
     def _create_grafana(self, daemon_id, host):
@@ -2856,8 +2994,8 @@ def add_alertmanager(self, spec):
         # type: (ServiceSpec) -> AsyncCompletion
         return self._add_daemon('alertmanager', spec, self._create_alertmanager)
 
-    def apply_alertmanager(self, spec):
-        # type: (ServiceSpec) -> AsyncCompletion
+    @trivial_completion
+    def apply_alertmanager(self, spec: ServiceSpec):
         return self._apply(spec)
 
     def _create_alertmanager(self, daemon_id, host):
@@ -2887,6 +3025,7 @@ def _get_container_image_id(self, image_name):
                        (image_name, image_id, ceph_version))
         return image_id, ceph_version
 
+    @trivial_completion
     def upgrade_check(self, image, version):
         if version:
             target_name = self.container_image_base + ':v' + version
@@ -2917,6 +3056,7 @@ def upgrade_check(self, image, version):
                     }
         return json.dumps(r, indent=4, sort_keys=True)
 
+    @trivial_completion
     def upgrade_status(self):
         r = orchestrator.UpgradeStatusSpec()
         if self.upgrade_state:
@@ -2926,8 +3066,9 @@ def upgrade_status(self):
                 r.message = 'Error: ' + self.upgrade_state.get('error')
             elif self.upgrade_state.get('paused'):
                 r.message = 'Upgrade paused'
-        return trivial_result(r)
+        return r
 
+    @trivial_completion
     def upgrade_start(self, image, version):
         if self.mode != 'root':
             raise OrchestratorError('upgrade is not supported in %s mode' % (
@@ -2954,10 +3095,8 @@ def upgrade_start(self, image, version):
             if self.upgrade_state.get('paused'):
                 del self.upgrade_state['paused']
                 self._save_upgrade_state()
-                return trivial_result('Resumed upgrade to %s' %
-                                      self.upgrade_state.get('target_name'))
-            return trivial_result('Upgrade to %s in progress' %
-                                  self.upgrade_state.get('target_name'))
+                return 'Resumed upgrade to %s' % self.upgrade_state.get('target_name')
+            return 'Upgrade to %s in progress' % self.upgrade_state.get('target_name')
         self.upgrade_state = {
             'target_name': target_name,
             'progress_id': str(uuid.uuid4()),
@@ -2966,34 +3105,33 @@ def upgrade_start(self, image, version):
         self._save_upgrade_state()
         self._clear_upgrade_health_checks()
         self.event.set()
-        return trivial_result('Initiating upgrade to %s' % (image))
+        return 'Initiating upgrade to %s' % (image)
 
+    @trivial_completion
     def upgrade_pause(self):
         if not self.upgrade_state:
             raise OrchestratorError('No upgrade in progress')
         if self.upgrade_state.get('paused'):
-            return trivial_result('Upgrade to %s already paused' %
-                                  self.upgrade_state.get('target_name'))
+            return 'Upgrade to %s already paused' % self.upgrade_state.get('target_name')
         self.upgrade_state['paused'] = True
         self._save_upgrade_state()
-        return trivial_result('Paused upgrade to %s' %
-                              self.upgrade_state.get('target_name'))
+        return 'Paused upgrade to %s' % self.upgrade_state.get('target_name')
 
+    @trivial_completion
     def upgrade_resume(self):
         if not self.upgrade_state:
             raise OrchestratorError('No upgrade in progress')
         if not self.upgrade_state.get('paused'):
-            return trivial_result('Upgrade to %s not paused' %
-                                  self.upgrade_state.get('target_name'))
+            return 'Upgrade to %s not paused' % self.upgrade_state.get('target_name')
         del self.upgrade_state['paused']
         self._save_upgrade_state()
         self.event.set()
-        return trivial_result('Resumed upgrade to %s' %
-                              self.upgrade_state.get('target_name'))
+        return 'Resumed upgrade to %s' % self.upgrade_state.get('target_name')
 
+    @trivial_completion
     def upgrade_stop(self):
         if not self.upgrade_state:
-            return trivial_result('No upgrade in progress')
+            return 'No upgrade in progress'
         target_name = self.upgrade_state.get('target_name')
         if 'progress_id' in self.upgrade_state:
             self.remote('progress', 'complete',
@@ -3002,11 +3140,12 @@ def upgrade_stop(self):
         self._save_upgrade_state()
         self._clear_upgrade_health_checks()
         self.event.set()
-        return trivial_result('Stopped upgrade to %s' % target_name)
+        return 'Stopped upgrade to %s' % target_name
 
+    @trivial_completion
     def remove_osds(self, osd_ids: List[str],
                     replace: bool = False,
-                    force: bool = False) -> orchestrator.Completion:
+                    force: bool = False):
         """
         Takes a list of OSDs and schedules them for removal.
         The function that takes care of the actual removal is
@@ -3030,42 +3169,21 @@ def remove_osds(self, osd_ids: List[str],
 
         # trigger the serve loop to initiate the removal
         self._kick_serve_loop()
-        return trivial_result(f"Scheduled OSD(s) for removal")
+        return "Scheduled OSD(s) for removal"
 
-    def remove_osds_status(self) -> orchestrator.Completion:
+    @trivial_completion
+    def remove_osds_status(self):
         """
         The CLI call to retrieve an osd removal report
         """
-        return trivial_result(self.rm_util.report)
+        return self.rm_util.report
 
-    def list_specs(self, service_name=None) -> orchestrator.Completion:
+    @trivial_completion
+    def list_specs(self, service_name=None):
         """
         Loads all entries from the service_spec mon_store root.
         """
-        specs = list()
-        for spec in self.spec_store.find(service_name=service_name):
-            specs.append('---')
-            specs.append(yaml.safe_dump(spec.to_json()))
-        return trivial_result(specs)
-
-    def apply_service_config(self, spec_document: str) -> orchestrator.Completion:
-        """
-        Parse a multi document yaml file (represented in a inbuf object)
-        and loads it with it's respective ServiceSpec to validate the
-        initial input.
-        If no errors are raised, save them.
-        """
-        content: Iterator[Any] = yaml.load_all(spec_document)
-        # Load all specs from a multi document yaml file.
-        loaded_specs: List[ServiceSpec] = list()
-        for spec in content:
-            # load ServiceSpec once to validate
-            spec_o = ServiceSpec.from_json(spec)
-            loaded_specs.append(spec_o)
-        for spec in loaded_specs:
-            self.spec_store.save(spec)
-        self._kick_serve_loop()
-        return trivial_result("ServiceSpecs saved")
+        return self.spec_store.find(service_name=service_name)
 
 
 class BaseScheduler(object):
@@ -3121,6 +3239,7 @@ def __init__(self,
                  get_hosts_func,  # type: Callable[[Optional[str]],List[str]]
                  get_daemons_func, # type: Callable[[str],List[orchestrator.DaemonDescription]]
 
+                 filter_new_host=None, # type: Optional[Callable[[str],bool]]
                  scheduler=None,  # type: Optional[BaseScheduler]
                  ):
         assert spec and get_hosts_func and get_daemons_func
@@ -3128,13 +3247,41 @@ def __init__(self,
         self.scheduler = scheduler if scheduler else SimpleScheduler(self.spec.placement)
         self.get_hosts_func = get_hosts_func
         self.get_daemons_func = get_daemons_func
+        self.filter_new_host = filter_new_host
         self.service_name = spec.service_name()
 
+
+    def validate(self):
+        self.spec.validate()
+
+        if self.spec.placement.hosts:
+            explicit_hostnames = {h.hostname for h in self.spec.placement.hosts}
+            unknown_hosts = explicit_hostnames.difference(set(self.get_hosts_func(None)))
+            if unknown_hosts:
+                raise OrchestratorValidationError(
+                    f'Cannot place {self.spec.one_line_str()} on {unknown_hosts}: Unknown hosts')
+
+        if self.spec.placement.host_pattern:
+            pattern_hostnames = self.spec.placement.pattern_matches_hosts(self.get_hosts_func(None))
+            if not pattern_hostnames:
+                raise OrchestratorValidationError(
+                    f'Cannot place {self.spec.one_line_str()}: No matching hosts')
+
+        if self.spec.placement.label:
+            label_hostnames = self.get_hosts_func(self.spec.placement.label)
+            if not label_hostnames:
+                raise OrchestratorValidationError(
+                    f'Cannot place {self.spec.one_line_str()}: No matching '
+                    f'hosts for label {self.spec.placement.label}')
+
     def place(self):
         # type: () -> List[HostPlacementSpec]
         """
         Load hosts into the spec.placement.hosts container.
         """
+
+        self.validate()
+
         # count == 0
         if self.spec.placement.count == 0:
             return []
@@ -3214,6 +3361,10 @@ def place(self):
         need = count - len(existing + chosen)
         others = [hs for hs in hosts
                   if hs.hostname not in hosts_with_daemons]
+        if self.filter_new_host:
+            old = others
+            others = [h for h in others if self.filter_new_host(h.hostname)]
+            logger.debug('filtered %s down to %s' % (old, hosts))
         chosen = chosen + self.scheduler.place(others, need)
         logger.debug('Combine hosts with existing daemons %s + new hosts %s' % (
             existing, chosen))
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py
index 09eb4606f48cf..60c6f12e6d19e 100644
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -125,8 +125,8 @@ def test_apply_osd_save(self, _save_spec, cephadm_module):
             spec = ServiceSpec.from_json(json_spec)
             assert isinstance(spec, DriveGroupSpec)
             c = cephadm_module.apply_drivegroups([spec])
+            assert wait(cephadm_module, c) == ['Scheduled osd update...']
             _save_spec.assert_called_with(spec)
-            assert wait(cephadm_module, c[0]) == 'Scheduled osd update...'
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
     @mock.patch("cephadm.module.SpecStore.save")
@@ -136,8 +136,8 @@ def test_apply_osd_save_placement(self, _save_spec, cephadm_module):
             spec = ServiceSpec.from_json(json_spec)
             assert isinstance(spec, DriveGroupSpec)
             c = cephadm_module.apply_drivegroups([spec])
+            assert wait(cephadm_module, c) == ['Scheduled osd update...']
             _save_spec.assert_called_with(spec)
-            assert wait(cephadm_module, c[0]) == 'Scheduled osd update...'
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
     def test_create_osds(self, cephadm_module):
@@ -302,73 +302,56 @@ def test_blink_device_light(self, cephadm_module):
             assert wait(cephadm_module, c) == ['Set ident light for test: on']
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    def test_apply_mgr_save(self, _save_spec, cephadm_module):
+    def test_apply_mgr_save(self, cephadm_module):
         with self._with_host(cephadm_module, 'test'):
             ps = PlacementSpec(hosts=['test'], count=1)
             spec = ServiceSpec('mgr', placement=ps)
             c = cephadm_module.apply_mgr(spec)
-            _save_spec.assert_called_with(spec)
             assert wait(cephadm_module, c) == 'Scheduled mgr update...'
+            assert wait(cephadm_module, cephadm_module.list_specs()) == [spec]
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    def test_apply_mds_save(self, _save_spec, cephadm_module):
+    def test_apply_mds_save(self, cephadm_module):
         with self._with_host(cephadm_module, 'test'):
             ps = PlacementSpec(hosts=['test'], count=1)
             spec = ServiceSpec('mds', 'fsname', placement=ps)
             c = cephadm_module.apply_mds(spec)
-            _save_spec.assert_called_with(spec)
             assert wait(cephadm_module, c) == 'Scheduled mds update...'
+            assert wait(cephadm_module, cephadm_module.list_specs()) == [spec]
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    def test_apply_rgw_save(self, _save_spec, cephadm_module):
+    def test_apply_rgw_save(self, cephadm_module):
         with self._with_host(cephadm_module, 'test'):
             ps = PlacementSpec(hosts=['test'], count=1)
             spec = ServiceSpec('rgw', 'r.z', placement=ps)
             c = cephadm_module.apply_rgw(spec)
-            _save_spec.assert_called_with(spec)
             assert wait(cephadm_module, c) == 'Scheduled rgw update...'
+            assert wait(cephadm_module, cephadm_module.list_specs()) == [spec]
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    def test_apply_rbd_mirror_save(self, _save_spec, cephadm_module):
+    def test_apply_rbd_mirror_save(self, cephadm_module):
         with self._with_host(cephadm_module, 'test'):
             ps = PlacementSpec(hosts=['test'], count=1)
             spec = ServiceSpec('rbd-mirror', placement=ps)
             c = cephadm_module.apply_rbd_mirror(spec)
-            _save_spec.assert_called_with(spec)
             assert wait(cephadm_module, c) == 'Scheduled rbd-mirror update...'
+            assert wait(cephadm_module, cephadm_module.list_specs()) == [spec]
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    def test_apply_prometheus_save(self, _save_spec, cephadm_module):
+    def test_apply_prometheus_save(self, cephadm_module):
         with self._with_host(cephadm_module, 'test'):
             ps = PlacementSpec(hosts=['test'], count=1)
             spec = ServiceSpec('prometheus', placement=ps)
             c = cephadm_module.apply_prometheus(spec)
-            _save_spec.assert_called_with(spec)
             assert wait(cephadm_module, c) == 'Scheduled prometheus update...'
+            assert wait(cephadm_module, cephadm_module.list_specs()) == [spec]
 
     @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    def test_apply_node_exporter_save(self, _save_spec, cephadm_module):
+    def test_apply_node_exporter_save(self, cephadm_module):
         with self._with_host(cephadm_module, 'test'):
             ps = PlacementSpec(hosts=['test'], count=1)
-            spec = ServiceSpec('node_exporter', placement=ps)
+            spec = ServiceSpec('node-exporter', placement=ps, service_id='my_exporter')
             c = cephadm_module.apply_node_exporter(spec)
-            _save_spec.assert_called_with(spec)
-            assert wait(cephadm_module, c) == 'Scheduled node_exporter update...'
-
-    @mock.patch("cephadm.module.CephadmOrchestrator._run_cephadm", _run_cephadm('{}'))
-    @mock.patch("cephadm.module.SpecStore.save")
-    @mock.patch("cephadm.module.yaml.load_all", return_value=[{'service_type': 'rgw', 'placement': {'count': 1}, 'spec': {'rgw_realm': 'realm1', 'rgw_zone': 'zone1'}}])
-    @mock.patch("cephadm.module.ServiceSpec")
-    def test_apply_service_config(self, _sspec, _yaml, _save_spec, cephadm_module):
-        with self._with_host(cephadm_module, 'test'):
-            c = cephadm_module.apply_service_config('dummy')
-            _save_spec.assert_called_once()
-            _sspec.from_json.assert_called_once()
-            assert wait(cephadm_module, c) == 'ServiceSpecs saved'
-
+            assert wait(cephadm_module, c) == 'Scheduled node-exporter update...'
+            assert wait(cephadm_module, cephadm_module.list_specs()) == [spec]
+            assert wait(cephadm_module, cephadm_module.list_specs('node-exporter.my_exporter')) == [spec]
diff --git a/src/pybind/mgr/cephadm/tests/test_scheduling.py b/src/pybind/mgr/cephadm/tests/test_scheduling.py
index f7f7f7e9edae2..54402bad469c0 100644
--- a/src/pybind/mgr/cephadm/tests/test_scheduling.py
+++ b/src/pybind/mgr/cephadm/tests/test_scheduling.py
@@ -246,6 +246,7 @@ def test_node_assignment3(service_type, placement, hosts,
         ('1 *'),
         ('* label:foo'),
         ('* host1 host2'),
+        ('hostname12hostname12hostname12hostname12hostname12hostname12hostname12'),  # > 63 chars
     ])
 def test_bad_placements(placement):
     try:
@@ -253,3 +254,45 @@ def test_bad_placements(placement):
         assert False
     except ServiceSpecValidationError as e:
         pass
+
+
+class NodeAssignmentTestBadSpec(NamedTuple):
+    service_type: str
+    placement: PlacementSpec
+    hosts: List[str]
+    daemons: List[DaemonDescription]
+    expected: str
+@pytest.mark.parametrize("service_type,placement,hosts,daemons,expected",
+    [
+        # unknown host
+        NodeAssignmentTestBadSpec(
+            'mon',
+            PlacementSpec(hosts=['unknownhost']),
+            ['knownhost'],
+            [],
+            "Cannot place <ServiceSpec for service_name=mon> on {'unknownhost'}: Unknown hosts"
+        ),
+        # unknown host pattern
+        NodeAssignmentTestBadSpec(
+            'mon',
+            PlacementSpec(host_pattern='unknownhost'),
+            ['knownhost'],
+            [],
+            "Cannot place <ServiceSpec for service_name=mon>: No matching hosts"
+        ),
+        # unknown label
+        NodeAssignmentTestBadSpec(
+            'mon',
+            PlacementSpec(label='unknownlabel'),
+            [],
+            [],
+            "Cannot place <ServiceSpec for service_name=mon>: No matching hosts for label unknownlabel"
+        ),
+    ])
+def test_bad_specs(service_type, placement, hosts, daemons, expected):
+    with pytest.raises(OrchestratorValidationError) as e:
+        hosts = HostAssignment(
+            spec=ServiceSpec(service_type, placement=placement),
+            get_hosts_func=lambda _: hosts,
+            get_daemons_func=lambda _: daemons).place()
+    assert str(e.value) == expected
diff --git a/src/pybind/mgr/dashboard/controllers/osd.py b/src/pybind/mgr/dashboard/controllers/osd.py
index f65af062c5186..4327f2e92fa9d 100644
--- a/src/pybind/mgr/dashboard/controllers/osd.py
+++ b/src/pybind/mgr/dashboard/controllers/osd.py
@@ -249,11 +249,12 @@ def _create_bare(self, data):
 
     @raise_if_no_orchestrator
     @handle_orchestrator_error('osd')
-    def _create_with_drive_groups(self, drive_group):
+    def _create_with_drive_groups(self, drive_groups):
         """Create OSDs with DriveGroups."""
         orch = OrchClient.instance()
         try:
-            orch.osds.create(DriveGroupSpec.from_json(drive_group))
+            dg_specs = [DriveGroupSpec.from_json(dg) for dg in drive_groups]
+            orch.osds.create(dg_specs)
         except (ValueError, TypeError, DriveGroupValidationError) as e:
             raise DashboardException(e, component='osd')
 
@@ -262,7 +263,7 @@ def _create_with_drive_groups(self, drive_group):
     def create(self, method, data, tracking_id):  # pylint: disable=W0622
         if method == 'bare':
             return self._create_bare(data)
-        if method == 'drive_group':
+        if method == 'drive_groups':
             return self._create_with_drive_groups(data)
         raise DashboardException(
             component='osd', http_status_code=400, msg='Unknown method: {}'.format(method))
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.ts
index ae35da6d6b540..eb8ded17349bb 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-creation-preview-modal/osd-creation-preview-modal.component.ts
@@ -9,7 +9,6 @@ import { CdFormBuilder } from '../../../../shared/forms/cd-form-builder';
 import { CdFormGroup } from '../../../../shared/forms/cd-form-group';
 import { FinishedTask } from '../../../../shared/models/finished-task';
 import { TaskWrapperService } from '../../../../shared/services/task-wrapper.service';
-import { DriveGroups } from '../osd-form/drive-groups.interface';
 
 @Component({
   selector: 'cd-osd-creation-preview-modal',
@@ -18,7 +17,7 @@ import { DriveGroups } from '../osd-form/drive-groups.interface';
 })
 export class OsdCreationPreviewModalComponent implements OnInit {
   @Input()
-  driveGroups: DriveGroups = {};
+  driveGroups: Object[] = [];
 
   @Output()
   submitAction = new EventEmitter();
@@ -47,7 +46,7 @@ export class OsdCreationPreviewModalComponent implements OnInit {
     this.taskWrapper
       .wrapTaskAroundCall({
         task: new FinishedTask('osd/' + URLVerbs.CREATE, {
-          tracking_id: _.join(_.keys(this.driveGroups), ', ')
+          tracking_id: _.join(_.map(this.driveGroups, 'service_id'), ', ')
         }),
         call: this.osdService.create(this.driveGroups)
       })
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-group.model.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-group.model.ts
index 976a2a59c7fac..dd5d3b0b13ed8 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-group.model.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-group.model.ts
@@ -1,9 +1,10 @@
+import * as _ from 'lodash';
+
 import { CdTableColumnFiltersChange } from '../../../../shared/models/cd-table-column-filters-change';
 import { FormatterService } from '../../../../shared/services/formatter.service';
 
 export class DriveGroup {
-  // DriveGroupSpec object.
-  spec = {};
+  spec: Object;
 
   // Map from filter column prop to device selection attribute name
   private deviceSelectionAttrs: {
@@ -16,6 +17,7 @@ export class DriveGroup {
   private formatterService: FormatterService;
 
   constructor() {
+    this.reset();
     this.formatterService = new FormatterService();
     this.deviceSelectionAttrs = {
       'sys_api.vendor': {
@@ -45,7 +47,14 @@ export class DriveGroup {
   }
 
   reset() {
-    this.spec = {};
+    this.spec = {
+      service_type: 'osd',
+      service_id: `dashboard-${_.now()}`
+    };
+  }
+
+  setName(name: string) {
+    this.spec['service_id'] = name;
   }
 
   setHostPattern(pattern: string) {
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-groups.interface.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-groups.interface.ts
deleted file mode 100644
index bccb7c9eed5d5..0000000000000
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/drive-groups.interface.ts
+++ /dev/null
@@ -1,3 +0,0 @@
-export interface DriveGroups {
-  [key: string]: object;
-}
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts
index 5defdbeecc5ba..e6be9f0ea5c9f 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/ceph/cluster/osd/osd-form/osd-form.component.ts
@@ -19,7 +19,6 @@ import { DevicesSelectionChangeEvent } from '../osd-devices-selection-groups/dev
 import { DevicesSelectionClearEvent } from '../osd-devices-selection-groups/devices-selection-clear-event.interface';
 import { OsdDevicesSelectionGroupsComponent } from '../osd-devices-selection-groups/osd-devices-selection-groups.component';
 import { DriveGroup } from './drive-group.model';
-import { DriveGroups } from './drive-groups.interface';
 import { OsdFeature } from './osd-feature.interface';
 
 @Component({
@@ -209,11 +208,9 @@ export class OsdFormComponent implements OnInit {
   submit() {
     // use user name and timestamp for drive group name
     const user = this.authStorageService.getUsername();
-    const driveGroups: DriveGroups = {
-      [`dashboard-${user}-${_.now()}`]: this.driveGroup.spec
-    };
+    this.driveGroup.setName(`dashboard-${user}-${_.now()}`);
     const modalRef = this.bsModalService.show(OsdCreationPreviewModalComponent, {
-      initialState: { driveGroups: driveGroups }
+      initialState: { driveGroups: [this.driveGroup.spec] }
     });
     modalRef.content.submitAction.subscribe(() => {
       this.router.navigate(['/osd']);
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts
index 3f1abd4ace4f7..44db3f5c85a20 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.spec.ts
@@ -29,20 +29,24 @@ describe('OsdService', () => {
   it('should call create', () => {
     const post_data = {
       method: 'drive_groups',
-      data: {
-        all_hdd: {
+      data: [
+        {
+          service_name: 'osd',
+          service_id: 'all_hdd',
           host_pattern: '*',
           data_devices: {
             rotational: true
           }
         },
-        host1_ssd: {
+        {
+          service_name: 'osd',
+          service_id: 'host1_ssd',
           host_pattern: 'host1',
           data_devices: {
             rotational: false
           }
         }
-      },
+      ],
       tracking_id: 'all_hdd, host1_ssd'
     };
     service.create(post_data.data).subscribe();
diff --git a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts
index 30cdcd1ea3697..8d6369a216233 100644
--- a/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts
+++ b/src/pybind/mgr/dashboard/frontend/src/app/shared/api/osd.service.ts
@@ -5,7 +5,6 @@ import { I18n } from '@ngx-translate/i18n-polyfill';
 import { map } from 'rxjs/operators';
 
 import * as _ from 'lodash';
-import { DriveGroups } from '../../ceph/cluster/osd/osd-form/drive-groups.interface';
 import { CdDevice } from '../models/devices';
 import { SmartDataResponseV1 } from '../models/smart';
 import { DeviceService } from '../services/device.service';
@@ -64,11 +63,11 @@ export class OsdService {
 
   constructor(private http: HttpClient, private i18n: I18n, private deviceService: DeviceService) {}
 
-  create(driveGroups: DriveGroups) {
+  create(driveGroups: Object[]) {
     const request = {
       method: 'drive_groups',
       data: driveGroups,
-      tracking_id: _.join(_.keys(driveGroups), ', ')
+      tracking_id: _.join(_.map(driveGroups, 'service_id'), ', ')
     };
     return this.http.post(this.path, request, { observe: 'response' });
   }
diff --git a/src/pybind/mgr/dashboard/services/orchestrator.py b/src/pybind/mgr/dashboard/services/orchestrator.py
index a1fa708432475..8cd499b06f8f3 100644
--- a/src/pybind/mgr/dashboard/services/orchestrator.py
+++ b/src/pybind/mgr/dashboard/services/orchestrator.py
@@ -40,7 +40,6 @@ def inner(self, *args, **kwargs):
         self.api.orchestrator_wait([completion])
         raise_if_exception(completion)
         return completion.result
-
     return inner
 
 
@@ -105,8 +104,8 @@ def reload(self, service_type, service_ids):
 
 class OsdManager(ResourceManager):
     @wait_api_result
-    def create(self, drive_group):
-        return self.api.create_osds(drive_group)
+    def create(self, drive_group_specs):
+        return self.api.apply_drivegroups(drive_group_specs)
 
     @wait_api_result
     def remove(self, osd_ids):
diff --git a/src/pybind/mgr/dashboard/tests/test_osd.py b/src/pybind/mgr/dashboard/tests/test_osd.py
index 549d8b477e74c..aeb32ed576452 100644
--- a/src/pybind/mgr/dashboard/tests/test_osd.py
+++ b/src/pybind/mgr/dashboard/tests/test_osd.py
@@ -273,11 +273,20 @@ def test_osd_create_with_drive_groups(self, instance):
         instance.return_value = fake_client
 
         # Valid DriveGroup
-        data = {'method': 'drive_group',
-                'data': {'service_type': 'osd', 'service_id': 'all_hdd',
-                         'data_devices': {'rotational': True},
-                         'host_pattern': '*'},
-                'tracking_id': 'all_hdd, b_ssd'}
+        data = {
+            'method': 'drive_groups',
+            'data': [
+                {
+                    'service_type': 'osd',
+                    'service_id': 'all_hdd',
+                    'data_devices': {
+                        'rotational': True
+                    },
+                    'host_pattern': '*',
+                }
+            ],
+            'tracking_id': 'all_hdd, b_ssd'
+        }
 
         # Without orchestrator service
         fake_client.available.return_value = False
@@ -288,11 +297,11 @@ def test_osd_create_with_drive_groups(self, instance):
         fake_client.available.return_value = True
         self._task_post('/api/osd', data)
         self.assertStatus(201)
-        fake_client.osds.create.assert_called_with(
-            DriveGroupSpec(placement=PlacementSpec(host_pattern='*'),
-                           service_id='all_hdd',
-                           service_type='osd',
-                           data_devices=DeviceSelection(rotational=True)))
+        dg_specs = [DriveGroupSpec(placement=PlacementSpec(host_pattern='*'),
+                                   service_id='all_hdd',
+                                   service_type='osd',
+                                   data_devices=DeviceSelection(rotational=True))]
+        fake_client.osds.create.assert_called_with(dg_specs)
 
     @mock.patch('dashboard.controllers.orchestrator.OrchClient.instance')
     def test_osd_create_with_invalid_drive_groups(self, instance):
@@ -301,10 +310,19 @@ def test_osd_create_with_invalid_drive_groups(self, instance):
         instance.return_value = fake_client
 
         # Invalid DriveGroup
-        data = {'method': 'drive_group',
-                'data': {'service_type': 'osd', 'service_id': 'invalid_dg',
-                         'data_devices': {'rotational': True},
-                         'host_pattern_wrong': 'unknown'},
-                'tracking_id': 'all_hdd, b_ssd'}
+        data = {
+            'method': 'drive_groups',
+            'data': [
+                {
+                    'service_type': 'osd',
+                    'service_id': 'invalid_dg',
+                    'data_devices': {
+                        'rotational': True
+                    },
+                    'host_pattern_wrong': 'unknown',
+                }
+            ],
+            'tracking_id': 'all_hdd, b_ssd'
+        }
         self._task_post('/api/osd', data)
         self.assertStatus(400)
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py
index 6f3f57566ef03..d72d8af97eb7b 100644
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -23,7 +23,7 @@
 
 try:
     from typing import TypeVar, Generic, List, Optional, Union, Tuple, Iterator, Callable, Any, \
-        Type, Sequence, Dict
+    Type, Sequence, Dict, cast
 except ImportError:
     pass
 
@@ -223,8 +223,11 @@ def then(self, on_complete):
         Call ``on_complete`` as soon as this promise is finalized.
         """
         assert self._state in (self.INITIALIZED, self.RUNNING)
+
+        if self._next_promise is not None:
+            return self._next_promise.then(on_complete)
+
         if self._on_complete is not None:
-            assert self._next_promise is None
             self._set_next_promise(self.__class__(
                 _first_promise=self._first_promise,
                 on_complete=on_complete
@@ -844,11 +847,38 @@ def list_daemons(self, daemon_type=None, daemon_id=None, host=None, refresh=Fals
         """
         raise NotImplementedError()
 
-    def apply_service_config(self, spec_document: str) -> Completion:
-        """
-        Saves Service Specs from a yaml|json file
-        """
-        raise NotImplementedError()
+    def apply(self, specs: List[ServiceSpec]) -> Completion:
+        """
+        Applies any spec
+        """
+        fns: Dict[str, Callable[[ServiceSpec], Completion]] = {
+            'alertmanager': self.apply_alertmanager,
+            'crash': self.apply_crash,
+            'grafana': self.apply_grafana,
+            'mds': self.apply_mds,
+            'mgr': self.apply_mgr,
+            'mon': self.apply_mon,
+            'nfs': cast(Callable[[ServiceSpec], Completion], self.apply_nfs),
+            'node-exporter': self.apply_node_exporter,
+            'osd': cast(Callable[[ServiceSpec], Completion], lambda dg: self.apply_drivegroups([dg])),
+            'prometheus': self.apply_prometheus,
+            'rbd-mirror': self.apply_rbd_mirror,
+            'rgw': cast(Callable[[ServiceSpec], Completion], self.apply_rgw),
+        }
+
+        def merge(ls, r):
+            if isinstance(ls, list):
+                return ls + [r]
+            return [ls, r]
+
+        spec, *specs = specs
+
+        completion = fns[spec.service_type](spec)
+        for s in specs:
+            def next(ls):
+                return fns[s.service_type](s).then(lambda r: merge(ls, r))
+            completion = completion.then(next)
+        return completion
 
     def remove_daemons(self, names):
         # type: (List[str]) -> Completion
@@ -913,7 +943,7 @@ def create_osds(self, drive_group):
         """
         raise NotImplementedError()
 
-    def apply_drivegroups(self, specs: List[DriveGroupSpec]) -> Sequence[Completion]:
+    def apply_drivegroups(self, specs: List[DriveGroupSpec]) -> Completion:
         """ Update OSD cluster """
         raise NotImplementedError()
 
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py
index 8461d5e42a132..a0de33fa0c76d 100644
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -11,7 +11,7 @@
 from mgr_util import format_bytes, to_pretty_timedelta
 
 try:
-    from typing import List, Set, Optional, Dict
+    from typing import List, Set, Optional, Dict, Iterator
 except ImportError:
     pass  # just for type checking.
 
@@ -342,7 +342,7 @@ def ukn(s):
             now = datetime.datetime.utcnow()
             table = PrettyTable(
                 ['NAME', 'RUNNING', 'REFRESHED', 'AGE',
-                 'SPEC',
+                 'PLACEMENT',
                  'IMAGE NAME', 'IMAGE ID',
                 ],
                 border=False)
@@ -352,16 +352,22 @@ def ukn(s):
             table.align['AGE'] = 'l'
             table.align['IMAGE NAME'] = 'l'
             table.align['IMAGE ID'] = 'l'
-            table.align['SPEC'] = 'l'
+            table.align['PLACEMENT'] = 'l'
             table.left_padding_width = 0
             table.right_padding_width = 2
             for s in sorted(services, key=lambda s: s.service_name):
+                if not s.spec:
+                    pl = '<no spec>'
+                elif s.spec.unmanaged:
+                    pl = '<unmanaged>'
+                else:
+                    pl = s.spec.placement.pretty_str()
                 table.add_row((
                     s.service_name,
                     '%d/%d' % (s.running, s.size),
                     nice_delta(now, s.last_refresh, ' ago'),
                     nice_delta(now, s.created),
-                    s.spec.placement.pretty_str() if s.spec else '-',
+                    pl,
                     ukn(s.container_image_name),
                     ukn(s.container_image_id)[0:12],
                 ))
@@ -429,28 +435,40 @@ def ukn(s):
 
     @_cli_write_command(
         'orch apply osd',
-        desc='Create an OSD daemons using drive_groups')
-    def _apply_osd(self, inbuf=None):
-        # type: (Optional[str]) -> HandleCommandResult
+        'name=all_available_devices,type=CephBool,req=false',
+        'Create OSD daemon(s) using a drive group spec')
+    def _apply_osd(self, all_available_devices=False, inbuf=None):
+        # type: (bool, Optional[str]) -> HandleCommandResult
         """Apply DriveGroupSpecs to create OSDs"""
         usage = """
 Usage:
   ceph orch apply osd -i <json_file/yaml_file>
+  ceph orch apply osd --use-all-devices
 """
-        if not inbuf:
+        if not inbuf and not all_available_devices:
             return HandleCommandResult(-errno.EINVAL, stderr=usage)
-        try:
-            drivegroups = yaml.load_all(inbuf)
-            dg_specs = [ServiceSpec.from_json(dg) for dg in drivegroups]
-        except ValueError as e:
-            msg = 'Failed to read JSON/YAML input: {}'.format(str(e)) + usage
-            return HandleCommandResult(-errno.EINVAL, stderr=msg)
+        if inbuf:
+            if all_available_devices:
+                raise OrchestratorError('--all-available-devices cannot be combined with an osd spec')
+            try:
+                drivegroups = yaml.load_all(inbuf)
+                dg_specs = [ServiceSpec.from_json(dg) for dg in drivegroups]
+            except ValueError as e:
+                msg = 'Failed to read JSON/YAML input: {}'.format(str(e)) + usage
+                return HandleCommandResult(-errno.EINVAL, stderr=msg)
+        else:
+            dg_specs = [
+                DriveGroupSpec(
+                    service_id='all-available-devices',
+                    placement=PlacementSpec(host_pattern='*'),
+                    data_devices=DeviceSelection(all=True),
+                )
+            ]
 
-        completions = self.apply_drivegroups(dg_specs)
-        [self._orchestrator_wait([completion]) for completion in completions]  # type: ignore
-        [raise_if_exception(completion) for completion in completions]  # type: ignore
-        result_strings = [completion.result_str() for completion in completions]
-        return HandleCommandResult(stdout=" ".join(result_strings))
+        completion = self.apply_drivegroups(dg_specs)
+        self._orchestrator_wait([completion])
+        raise_if_exception(completion)
+        return HandleCommandResult(stdout=completion.result_str())
 
     @_cli_write_command(
         'orch daemon add osd',
@@ -517,53 +535,43 @@ def _osd_rm_status(self) -> HandleCommandResult:
         return HandleCommandResult(stdout=table.get_string())
 
     @_cli_write_command(
-        'orch daemon add mon',
-        'name=placement,type=CephString,req=false',
-        'Start monitor daemon(s)')
-    def _daemon_add_mon(self, placement=None):
-        placement = PlacementSpec.from_string(placement)
-        placement.validate()
-
-        spec = ServiceSpec('mon', placement=placement)
-
-        completion = self.add_mon(spec)
-        self._orchestrator_wait([completion])
-        raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch daemon add mgr',
+        'orch daemon add',
+        'name=daemon_type,type=CephChoices,strings=mon|mgr|rbd-mirror|crash|alertmanager|grafana|node-exporter|prometheus,req=false '
         'name=placement,type=CephString,req=false',
-        'Start rbd-mirror daemon(s)')
-    def _daemon_add_mgr(self, placement=None):
-        spec = ServiceSpec(
-            'mgr',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_mgr(spec)
-        self._orchestrator_wait([completion])
-        raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch apply',
-        desc='Applies a Service Specification from a file. ceph orch apply -i $file')
-    def _apply_services(self, inbuf):
-        completion = self.apply_service_config(inbuf)
-        self._orchestrator_wait([completion])
-        raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
+        'Add daemon(s)')
+    def _daemon_add_misc(self, daemon_type=None, placement=None, inbuf=None):
+        usage = f"""Usage:
+    ceph orch daemon add -i <json_file>
+    ceph orch daemon add {daemon_type or '<daemon_type>'} <placement>"""
+        if inbuf:
+            if daemon_type or placement:
+                raise OrchestratorValidationError(usage)
+            spec = ServiceSpec.from_json(yaml.safe_load(inbuf))
+        else:
+            placement = PlacementSpec.from_string(placement)
+            placement.validate()
+
+            spec = ServiceSpec(daemon_type, placement=placement)
+
+        if daemon_type == 'mon':
+            completion = self.add_mon(spec)
+        elif daemon_type == 'mgr':
+            completion = self.add_mgr(spec)
+        elif daemon_type == 'rbd-mirror':
+            completion = self.add_rbd_mirror(spec)
+        elif daemon_type == 'crash':
+            completion = self.add_crash(spec)
+        elif daemon_type == 'alertmanager':
+            completion = self.add_alertmanager(spec)
+        elif daemon_type == 'grafana':
+            completion = self.add_grafana(spec)
+        elif daemon_type == 'node-exporter':
+            completion = self.add_node_exporter(spec)
+        elif daemon_type == 'prometheus':
+            completion = self.add_prometheus(spec)
+        else:
+            raise OrchestratorValidationError(f'unknown daemon type `{daemon_type}`')
 
-    @_cli_write_command(
-        'orch daemon add rbd-mirror',
-        'name=placement,type=CephString,req=false',
-        'Start rbd-mirror daemon(s)')
-    def _rbd_mirror_add(self, placement=None):
-        spec = ServiceSpec(
-            'rbd-mirror',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_rbd_mirror(spec)
         self._orchestrator_wait([completion])
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
@@ -632,73 +640,6 @@ def _nfs_add(self, svc_arg, pool, namespace=None, placement=None):
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
-    @_cli_write_command(
-        'orch daemon add prometheus',
-        'name=placement,type=CephString,req=false',
-        'Add prometheus daemon(s)')
-    def _daemon_add_prometheus(self, placement=None):
-        spec = ServiceSpec(
-            'prometheus',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_prometheus(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch daemon add node-exporter',
-        'name=placement,type=CephString,req=false',
-        'Add node-exporter daemon(s)')
-    def _daemon_add_node_exporter(self, placement=None):
-        spec = ServiceSpec(
-            'node-exporter',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_node_exporter(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch daemon add crash',
-        'name=placement,type=CephString,req=false',
-        'Add node-exporter daemon(s)')
-    def _daemon_add_crash(self, placement=None):
-        spec = ServiceSpec(
-            'crash',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_crash(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch daemon add grafana',
-        'name=placement,type=CephString,req=false',
-        'Add grafana daemon(s)')
-    def _daemon_add_grafana(self, placement=None):
-        # type: (Optional[str]) -> HandleCommandResult
-        spec = ServiceSpec(
-            'grafana',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_grafana(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch daemon add alertmanager',
-        'name=placement,type=CephString,req=false',
-        'Add alertmanager daemon(s)')
-    def _daemon_add_alertmanager(self, placement=None):
-        # type: (Optional[str]) -> HandleCommandResult
-        spec = ServiceSpec(
-            'alertmanager',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.add_alertmanager(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
     @_cli_write_command(
         'orch',
         "name=action,type=CephChoices,strings=start|stop|restart|redeploy|reconfig "
@@ -762,35 +703,32 @@ def _get_service_specs(self, service_name=None):
         completion = self.list_specs(service_name=service_name)
         self._orchestrator_wait([completion])
         raise_if_exception(completion)
-        specs = completion.result_str()
-        return HandleCommandResult(stdout=specs)
+        specs = completion.result
+        return HandleCommandResult(stdout=yaml.safe_dump_all(specs))
 
     @_cli_write_command(
-        'orch apply mgr',
-        'name=placement,type=CephString,req=false',
-        'Update the size or placement of managers')
-    def _apply_mgr(self, placement=None):
-        placement = PlacementSpec.from_string(placement)
-        placement.validate()
-
-        spec = ServiceSpec('mgr', placement=placement)
-
-        completion = self.apply_mgr(spec)
-        self._orchestrator_wait([completion])
-        raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch apply mon',
-        'name=placement,type=CephString,req=false',
-        'Update the number of monitor instances')
-    def _apply_mon(self, placement=None):
-        placement = PlacementSpec.from_string(placement)
-        placement.validate()
+        'orch apply',
+        'name=service_type,type=CephChoices,strings=mon|mgr|rbd-mirror|crash|alertmanager|grafana|node-exporter|prometheus,req=false '
+        'name=placement,type=CephString,req=false '
+        'name=unmanaged,type=CephBool,req=false',
+        'Update the size or placement for a service or apply a large yaml spec')
+    def _apply_misc(self, service_type=None, placement=None, unmanaged=False, inbuf=None):
+        usage = """Usage:
+  ceph orch apply -i <yaml spec>
+  ceph orch apply <service_type> <placement> [--unmanaged]
+        """
+        if inbuf:
+            if service_type or placement or unmanaged:
+                raise OrchestratorValidationError(usage)
+            content: Iterator = yaml.load_all(inbuf)
+            specs = [ServiceSpec.from_json(s) for s in content]
+        else:
+            placement = PlacementSpec.from_string(placement)
+            placement.validate()
 
-        spec = ServiceSpec('mon', placement=placement)
+            specs = [ServiceSpec(service_type, placement=placement, unmanaged=unmanaged)]
 
-        completion = self.apply_mon(spec)
+        completion = self.apply(specs)
         self._orchestrator_wait([completion])
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
@@ -798,44 +736,34 @@ def _apply_mon(self, placement=None):
     @_cli_write_command(
         'orch apply mds',
         'name=fs_name,type=CephString '
-        'name=placement,type=CephString,req=false',
+        'name=placement,type=CephString,req=false '
+        'name=unmanaged,type=CephBool,req=false',
         'Update the number of MDS instances for the given fs_name')
-    def _apply_mds(self, fs_name, placement=None):
+    def _apply_mds(self, fs_name, placement=None, unmanaged=False):
         placement = PlacementSpec.from_string(placement)
         placement.validate()
         spec = ServiceSpec(
             'mds', fs_name,
-            placement=placement)
+            placement=placement,
+            unmanaged=unmanaged)
         completion = self.apply_mds(spec)
         self._orchestrator_wait([completion])
         raise_if_exception(completion)
         return HandleCommandResult(stdout=completion.result_str())
 
-    @_cli_write_command(
-        'orch apply rbd-mirror',
-        'name=placement,type=CephString,req=false',
-        'Update the number of rbd-mirror instances')
-    def _apply_rbd_mirror(self, placement=None):
-        spec = ServiceSpec(
-            'rbd-mirror',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.apply_rbd_mirror(spec)
-        self._orchestrator_wait([completion])
-        raise_if_exception(completion)
-        return HandleCommandResult(stdout=completion.result_str())
-
     @_cli_write_command(
         'orch apply rgw',
         'name=realm_name,type=CephString '
         'name=zone_name,type=CephString '
-        'name=placement,type=CephString,req=false',
+        'name=placement,type=CephString,req=false '
+        'name=unmanaged,type=CephBool,req=false',
         'Update the number of RGW instances for the given zone')
-    def _apply_rgw(self, zone_name, realm_name, placement=None):
+    def _apply_rgw(self, zone_name, realm_name, placement=None, unmanaged=False):
         spec = RGWSpec(
             rgw_realm=realm_name,
             rgw_zone=zone_name,
             placement=PlacementSpec.from_string(placement),
+            unmanaged=unmanaged,
         )
         completion = self.apply_rgw(spec)
         self._orchestrator_wait([completion])
@@ -845,82 +773,19 @@ def _apply_rgw(self, zone_name, realm_name, placement=None):
     @_cli_write_command(
         'orch apply nfs',
         "name=svc_id,type=CephString "
-        'name=placement,type=CephString,req=false',
+        'name=placement,type=CephString,req=false '
+        'name=unmanaged,type=CephBool,req=false',
         'Scale an NFS service')
-    def _apply_nfs(self, svc_id, placement=None):
+    def _apply_nfs(self, svc_id, placement=None, unmanaged=False):
         spec = NFSServiceSpec(
             svc_id,
             placement=PlacementSpec.from_string(placement),
+            unmanaged=unmanaged,
         )
         completion = self.apply_nfs(spec)
         self._orchestrator_wait([completion])
         return HandleCommandResult(stdout=completion.result_str())
 
-    @_cli_write_command(
-        'orch apply prometheus',
-        'name=placement,type=CephString,req=false',
-        'Scale prometheus service')
-    def _apply_prometheus(self, placement=None):
-        spec = ServiceSpec(
-            'prometheus',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.apply_prometheus(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch apply grafana',
-        'name=placement,type=CephString,req=false',
-        'Scale grafana service')
-    def _apply_grafana(self, placement=None):
-        spec = ServiceSpec(
-            'grafana',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.apply_grafana(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch apply alertmanager',
-        'name=placement,type=CephString,req=false',
-        'Scale alertmanager service')
-    def _apply_alertmanager(self, placement=None):
-        spec = ServiceSpec(
-            'alertmanager',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.apply_alertmanager(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch apply node-exporter',
-        'name=placement,type=CephString,req=false',
-        'Update node_exporter service')
-    def _apply_node_exporter(self, placement=None):
-        spec = ServiceSpec(
-            'node-exporter',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.apply_node_exporter(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
-    @_cli_write_command(
-        'orch apply crash',
-        'name=placement,type=CephString,req=false',
-        'Update node_exporter service')
-    def _apply_crash(self, placement=None):
-        spec = ServiceSpec(
-            'crash',
-            placement=PlacementSpec.from_string(placement),
-        )
-        completion = self.apply_crash(spec)
-        self._orchestrator_wait([completion])
-        return HandleCommandResult(stdout=completion.result_str())
-
     @_cli_write_command(
         'orch set backend',
         "name=module_name,type=CephString,req=true",
diff --git a/src/pybind/mgr/progress/module.py b/src/pybind/mgr/progress/module.py
index 4cc52a179d93e..c37d7f2ce8fd2 100644
--- a/src/pybind/mgr/progress/module.py
+++ b/src/pybind/mgr/progress/module.py
@@ -5,6 +5,8 @@
     TYPE_CHECKING = False
 
 from mgr_module import MgrModule, OSDMap
+from mgr_util import to_pretty_timedelta
+from datetime import timedelta
 import os
 import threading
 import datetime
@@ -36,16 +38,12 @@ def __init__(self, message, refs, started_at=None):
         self._refs = refs
         self.started_at = started_at if started_at else time.time()
         self.id = None  # type: Optional[str]
-        self.update_duration_event()
-        self._time_remaining_str = "(time remaining: N/A)"
 
     def _refresh(self):
         global _module
         assert _module
         _module.log.debug('refreshing mgr for %s (%s) at %f' % (self.id, self._message,
                                                                 self.progress))
-        self.update_duration_event()
-        self.update_time_remaining()
         _module.update_progress_event(
             self.id, self.twoline_progress(6), self.progress)
 
@@ -66,7 +64,9 @@ def progress(self):
 
     @property
     def duration_str(self):
-        return self._duration_str
+        duration = time.time() - self.started_at
+        return "(%s)" % (
+            to_pretty_timedelta(timedelta(seconds=duration)))
 
     @property
     def failed(self):
@@ -78,7 +78,8 @@ def failure_message(self):
 
     def summary(self):
         # type: () -> str
-        return "{0} {1} {2}".format(self.progress, self.message, self.duration_str)
+        return "{0} {1} {2}".format(self.progress, self.message,
+                                    self.duration_str)
 
     def _progress_str(self, width):
         inner_width = width - 2
@@ -94,15 +95,21 @@ def twoline_progress(self, indent=4):
         """
         e.g.
 
-        - Eating my delicious strudel (since: 00h 00m 30s)
-            [===============..............] (time remaining: 00h 03m 57s)
+        - Eating my delicious strudel (since: 30s)
+            [===============..............] (remaining: 04m)
 
         """
+        time_remaining = self.estimated_time_remaining()
+        if time_remaining:
+            remaining = "(remaining: %s)" % (
+                to_pretty_timedelta(timedelta(seconds=time_remaining)))
+        else:
+            remaining = ''
         return "{0} {1}\n{2}{3} {4}".format(self._message,
-                                            self._duration_str,
+                                            self.duration_str,
                                             " " * indent,
                                             self._progress_str(30),
-                                            self._time_remaining_str)
+                                            remaining)
 
     def to_json(self):
         # type: () -> Dict[str, Any]
@@ -116,13 +123,6 @@ def to_json(self):
             "time_remaining": self.estimated_time_remaining()
         }
 
-    def update_duration_event(self):
-        # Update duration of event in seconds/minutes/hours
-
-        duration = time.time() - self.started_at
-        self._duration_str = time.strftime("(%Hh %Mm %Ss)", time.gmtime(duration))
-
-
     def estimated_time_remaining(self):
         elapsed = time.time() - self.started_at
         progress = self.progress
@@ -130,14 +130,6 @@ def estimated_time_remaining(self):
             return None
         return int(elapsed * (1 - progress) / progress)
 
-    def update_time_remaining(self):
-        time_remaining = self.estimated_time_remaining()
-        if time_remaining:
-            self._time_remaining_str = time.strftime(
-                "(time remaining: %Hh %Mm %Ss)", time.gmtime(time_remaining))
-        else:
-            self._time_remaining_str = "(time remaining: N/A)"
-
 class GhostEvent(Event):
     """
     The ghost of a completed event: these are the fields that we persist
diff --git a/src/pybind/mgr/test_orchestrator/module.py b/src/pybind/mgr/test_orchestrator/module.py
index 10fa8482e6ba6..1483ca1995bc1 100644
--- a/src/pybind/mgr/test_orchestrator/module.py
+++ b/src/pybind/mgr/test_orchestrator/module.py
@@ -259,7 +259,7 @@ def run(all_hosts):
         )
 
     def apply_drivegroups(self, specs):
-        # type: (List[DriveGroupSpec]) -> Sequence[TestCompletion]
+        # type: (List[DriveGroupSpec]) -> TestCompletion
         drive_group = specs[0]
         def run(all_hosts):
             # type: (List[orchestrator.HostSpec]) -> None
@@ -267,12 +267,12 @@ def run(all_hosts):
             if drive_group.placement.host_pattern:
                 if not drive_group.placement.pattern_matches_hosts([h.hostname for h in all_hosts]):
                     raise orchestrator.OrchestratorValidationError('failed to match')
-        return [self.get_hosts().then(run).then(
+        return self.get_hosts().then(run).then(
             on_complete=orchestrator.ProgressReference(
                 message='apply_drivesgroups',
                 mgr=self,
             )
-        )]
+        )
 
     @deferred_write("remove_daemons")
     def remove_daemons(self, names):
diff --git a/src/pybind/mgr/tests/__init__.py b/src/pybind/mgr/tests/__init__.py
index e2d73534fed9e..680302e6afa1a 100644
--- a/src/pybind/mgr/tests/__init__.py
+++ b/src/pybind/mgr/tests/__init__.py
@@ -22,9 +22,14 @@ def __init__(self, *args):
             self._ceph_get_version = mock.Mock()
             self._ceph_get = mock.MagicMock()
             self._ceph_get_module_option = mock.MagicMock()
+            self._ceph_get_option = mock.MagicMock()
+            self._validate_module_option = lambda _: True
+            self._configure_logging = lambda *_: None
+            self._unconfigure_logging = mock.MagicMock()
             self._ceph_log = mock.MagicMock()
             self._ceph_get_store = lambda _: ''
             self._ceph_get_store_prefix = lambda _: {}
+            self._ceph_dispatch_remote = lambda *_: None
 
 
     cm = mock.Mock()
diff --git a/src/pybind/mgr/tests/test_orchestrator.py b/src/pybind/mgr/tests/test_orchestrator.py
index f627e070aa142..66d4474639ce0 100644
--- a/src/pybind/mgr/tests/test_orchestrator.py
+++ b/src/pybind/mgr/tests/test_orchestrator.py
@@ -1,5 +1,7 @@
 from __future__ import absolute_import
 
+from ceph.deployment.service_spec import ServiceSpec
+from test_orchestrator import TestOrchestrator as _TestOrchestrator
 from tests import mock
 
 import pytest
@@ -226,3 +228,12 @@ def add_one(x):
 
     assert p.result == 5
 
+def test_apply():
+    to = _TestOrchestrator('', 0, 0)
+    completion = to.apply([
+        ServiceSpec(service_type='nfs'),
+        ServiceSpec(service_type='nfs'),
+        ServiceSpec(service_type='nfs'),
+    ])
+    completion.finalize(42)
+    assert  completion.result == [None, None, None]
\ No newline at end of file
diff --git a/src/pybind/mgr/volumes/fs/async_cloner.py b/src/pybind/mgr/volumes/fs/async_cloner.py
index 048e06dd0d082..d9d3c87a5a58f 100644
--- a/src/pybind/mgr/volumes/fs/async_cloner.py
+++ b/src/pybind/mgr/volumes/fs/async_cloner.py
@@ -66,7 +66,10 @@ def get_clone_source(clone_subvolume):
 
 def handle_clone_pending(volume_client, volname, index, groupname, subvolname, should_cancel):
     try:
-        next_state = OpSm.get_next_state("clone", "pending", 0)
+        if should_cancel():
+            next_state = OpSm.get_next_state("clone", "pending", -errno.EINTR)
+        else:
+            next_state = OpSm.get_next_state("clone", "pending", 0)
     except OpSmException as oe:
         raise VolumeException(oe.errno, oe.error_str)
     return (next_state, False)
@@ -83,9 +86,7 @@ def sync_attrs(fs_handle, target_path, source_statx):
 def bulk_copy(fs_handle, source_path, dst_path, should_cancel):
     """
     bulk copy data from source to destination -- only directories, symlinks
-    and regular files are synced. note that @should_cancel is not used right
-    now but would be required when implementing cancelation for in-progress
-    clone operations.
+    and regular files are synced.
     """
     log.info("copying data from {0} to {1}".format(source_path, dst_path))
     def cptree(src_root_path, dst_root_path):
@@ -93,7 +94,7 @@ def cptree(src_root_path, dst_root_path):
         try:
             with fs_handle.opendir(src_root_path) as dir_handle:
                 d = fs_handle.readdir(dir_handle)
-                while d:
+                while d and not should_cancel():
                     if d.d_name not in (b".", b".."):
                         log.debug("d={0}".format(d))
                         d_full_src = os.path.join(src_root_path, d.d_name)
@@ -125,7 +126,7 @@ def cptree(src_root_path, dst_root_path):
                                     raise
                         elif stat.S_ISREG(stx["mode"]):
                             log.debug("cptree: (REG) {0}".format(d_full_src))
-                            copy_file(fs_handle, d_full_src, d_full_dst, mo)
+                            copy_file(fs_handle, d_full_src, d_full_dst, mo, cancel_check=should_cancel)
                         else:
                             handled = False
                             log.warn("cptree: (IGNORE) {0}".format(d_full_src))
@@ -141,6 +142,8 @@ def cptree(src_root_path, dst_root_path):
             if not e.args[0] == errno.ENOENT:
                 raise VolumeException(-e.args[0], e.args[1])
     cptree(source_path, dst_path)
+    if should_cancel():
+        raise VolumeException(-errno.EINTR, "clone operation interrupted")
 
 def do_clone(volume_client, volname, groupname, subvolname, should_cancel):
     with open_volume_lockless(volume_client, volname) as fs_handle:
@@ -157,7 +160,7 @@ def handle_clone_in_progress(volume_client, volname, index, groupname, subvolnam
         next_state = OpSm.get_next_state("clone", "in-progress", 0)
     except VolumeException as ve:
         # jump to failed state
-        next_state = OpSm.get_next_state("clone", "in-progress", -1)
+        next_state = OpSm.get_next_state("clone", "in-progress", ve.errno)
     except OpSmException as oe:
         raise VolumeException(oe.errno, oe.error_str)
     return (next_state, False)
@@ -233,10 +236,72 @@ def __init__(self, volume_client, tp_size):
             'pending'     : handle_clone_pending,
             'in-progress' : handle_clone_in_progress,
             'complete'    : handle_clone_complete,
-            'failed'      : handle_clone_failed
+            'failed'      : handle_clone_failed,
+            'canceled'    : handle_clone_failed,
         }
         super(Cloner, self).__init__(volume_client, "cloner", tp_size)
 
+    def is_clone_cancelable(self, clone_state):
+        return not (OpSm.is_final_state(clone_state) or OpSm.is_failed_state(clone_state))
+
+    def get_clone_tracking_index(self, fs_handle, clone_subvolume):
+        with open_clone_index(fs_handle, self.vc.volspec) as index:
+            return index.find_clone_entry_index(clone_subvolume.base_path)
+
+    def _cancel_pending_clone(self, fs_handle, clone_subvolume, status, track_idx):
+        clone_state = status['state']
+        assert self.is_clone_cancelable(clone_state)
+
+        s_groupname = status['source'].get('group', None)
+        s_subvolname = status['source']['subvolume']
+        s_snapname = status['source']['snapshot']
+
+        with open_group(fs_handle, self.vc.volspec, s_groupname) as s_group:
+            with open_subvol(fs_handle, self.vc.volspec, s_group, s_subvolname) as s_subvolume:
+                next_state = OpSm.get_next_state("clone", clone_state, -errno.EINTR)
+                clone_subvolume.state = (next_state, True)
+                s_subvolume.detach_snapshot(s_snapname, track_idx.decode('utf-8'))
+
+    def cancel_job(self, volname, job):
+        """
+        override base class `cancel_job`. interpret @job as (clone, group) tuple.
+        """
+        clonename = job[0]
+        groupname = job[1]
+        track_idx = None
+
+        try:
+            with open_volume(self.vc, volname) as fs_handle:
+                with open_group(fs_handle, self.vc.volspec, groupname) as group:
+                    with open_subvol(fs_handle, self.vc.volspec, group, clonename,
+                                     need_complete=False, expected_types=["clone"]) as clone_subvolume:
+                        status = clone_subvolume.status
+                        clone_state = status['state']
+                        if not self.is_clone_cancelable(clone_state):
+                            raise VolumeException(-errno.EINVAL, "cannot cancel -- clone finished (check clone status)")
+                        track_idx = self.get_clone_tracking_index(fs_handle, clone_subvolume)
+                        if not track_idx:
+                            log.warn("cannot lookup clone tracking index for {0}".format(clone_subvolume.base_path))
+                            raise VolumeException(-errno.EINVAL, "error canceling clone")
+                        if OpSm.is_init_state("clone", clone_state):
+                            # clone has not started yet -- cancel right away.
+                            self._cancel_pending_clone(fs_handle, clone_subvolume, status, track_idx)
+                            return
+            # cancelling an on-going clone would persist "canceled" state in subvolume metadata.
+            # to persist the new state, async cloner accesses the volume in exclusive mode.
+            # accessing the volume in exclusive mode here would lead to deadlock.
+            assert track_idx is not None
+            with self.lock:
+                with open_volume_lockless(self.vc, volname) as fs_handle:
+                    with open_group(fs_handle, self.vc.volspec, groupname) as group:
+                        with open_subvol(fs_handle, self.vc.volspec, group, clonename,
+                                         need_complete=False, expected_types=["clone"]) as clone_subvolume:
+                            if not self._cancel_job(volname, (track_idx, clone_subvolume.base_path)):
+                                raise VolumeException(-errno.EINVAL, "cannot cancel -- clone finished (check clone status)")
+        except (IndexException, MetadataMgrException) as e:
+            log.error("error cancelling clone {0}: ({1})".format(job, e))
+            raise VolumeException(-errno.EINVAL, "error canceling clone")
+
     def get_next_job(self, volname, running_jobs):
         return get_next_clone_entry(self.vc, volname, running_jobs)
 
diff --git a/src/pybind/mgr/volumes/fs/async_job.py b/src/pybind/mgr/volumes/fs/async_job.py
index bb07fda6e3322..3bdedb723b9ce 100644
--- a/src/pybind/mgr/volumes/fs/async_job.py
+++ b/src/pybind/mgr/volumes/fs/async_job.py
@@ -156,7 +156,7 @@ def unregister_async_job(self, volname, job, thread_id):
         thread_id.reset_cancel()
 
         # wake up cancellation waiters if needed
-        if not self.jobs[volname] and cancelled:
+        if cancelled:
             logging.info("waking up cancellation waiters")
             self.cancel_cv.notifyAll()
 
@@ -194,6 +194,32 @@ def _cancel_jobs(self, volname):
         except (KeyError, ValueError):
             pass
 
+    def _cancel_job(self, volname, job):
+        """
+        cancel a executing job for a given volume. return True if canceled, False
+        otherwise (volume/job not found).
+        """
+        canceled = False
+        log.info("canceling job {0} for volume {1}".format(job, volname))
+        try:
+            if not volname in self.q and not volname in self.jobs and not job in self.jobs[volname]:
+                return canceled
+            for j in self.jobs[volname]:
+                if j[0] == job:
+                    j[1].cancel_job()
+                    # be safe against _cancel_jobs() running concurrently
+                    while j in self.jobs.get(volname, []):
+                        self.cancel_cv.wait()
+                    canceled = True
+                    break
+        except (KeyError, ValueError):
+            pass
+        return canceled
+
+    def cancel_job(self, volname, job):
+        with self.lock:
+            return self._cancel_job(volname, job)
+
     def cancel_jobs(self, volname):
         """
         cancel all executing jobs for a given volume.
diff --git a/src/pybind/mgr/volumes/fs/fs_util.py b/src/pybind/mgr/volumes/fs/fs_util.py
index 5f9995d4390b3..44e8d279a8a65 100644
--- a/src/pybind/mgr/volumes/fs/fs_util.py
+++ b/src/pybind/mgr/volumes/fs/fs_util.py
@@ -89,7 +89,7 @@ def list_one_entry_at_a_time(fs, dirpath):
     except cephfs.Error as e:
         raise VolumeException(-e.args[0], e.args[1])
 
-def copy_file(fs, src, dst, mode):
+def copy_file(fs, src, dst, mode, cancel_check=None):
     """
     Copy a regular file from @src to @dst. @dst is overwritten if it exists.
     """
@@ -107,6 +107,8 @@ def copy_file(fs, src, dst, mode):
     IO_SIZE = 8 * 1024 * 1024
     try:
         while True:
+            if cancel_check and cancel_check():
+                raise VolumeException(-errno.EINTR, "copy operation interrupted")
             data = fs.read(src_fd, -1, IO_SIZE)
             if not len(data):
                 break
diff --git a/src/pybind/mgr/volumes/fs/operations/clone_index.py b/src/pybind/mgr/volumes/fs/operations/clone_index.py
index 068a97a9de58b..a2b31f858e067 100644
--- a/src/pybind/mgr/volumes/fs/operations/clone_index.py
+++ b/src/pybind/mgr/volumes/fs/operations/clone_index.py
@@ -67,6 +67,20 @@ def get_oldest_clone_entry(self, exclude=[]):
                 raise IndexException(-e.args[0], e.args[1])
         return None
 
+    def find_clone_entry_index(self, sink_path):
+        try:
+            for entry in list_one_entry_at_a_time(self.fs, self.path):
+                dname = entry.d_name
+                dpath = os.path.join(self.path, dname)
+                st = self.fs.lstat(dpath)
+                if stat.S_ISLNK(st.st_mode):
+                    target_path = self.fs.readlink(dpath, CloneIndex.PATH_MAX)
+                    if sink_path == target_path[:st.st_size]:
+                        return dname
+            return None
+        except cephfs.Error as e:
+            raise IndexException(-e.args[0], e.args[1])
+
 def create_clone_index(fs, vol_spec):
     clone_index = CloneIndex(fs, vol_spec)
     try:
diff --git a/src/pybind/mgr/volumes/fs/operations/op_sm.py b/src/pybind/mgr/volumes/fs/operations/op_sm.py
index 459d904617d2e..a5f44f4d00df6 100644
--- a/src/pybind/mgr/volumes/fs/operations/op_sm.py
+++ b/src/pybind/mgr/volumes/fs/operations/op_sm.py
@@ -9,6 +9,7 @@ class OpSm(object):
 
     FAILED_STATE = 'failed'
     FINAL_STATE  = 'complete'
+    CANCEL_STATE = 'canceled'
 
     OP_SM_SUBVOLUME = {
         INIT_STATE_KEY : FINAL_STATE,
@@ -16,8 +17,8 @@ class OpSm(object):
 
     OP_SM_CLONE = {
         INIT_STATE_KEY : 'pending',
-        'pending'           : ('in-progress', FAILED_STATE),
-        'in-progress'       : (FINAL_STATE, FAILED_STATE),
+        'pending'           : ('in-progress', (FAILED_STATE, CANCEL_STATE)),
+        'in-progress'       : (FINAL_STATE, (FAILED_STATE, CANCEL_STATE)),
     } # type: Dict
 
     STATE_MACHINES_TYPES = {
@@ -31,7 +32,15 @@ def is_final_state(state):
 
     @staticmethod
     def is_failed_state(state):
-        return state == OpSm.FAILED_STATE
+        return state == OpSm.FAILED_STATE or state == OpSm.CANCEL_STATE
+
+    @staticmethod
+    def is_init_state(stm_type, state):
+        stm = OpSm.STATE_MACHINES_TYPES.get(stm_type, None)
+        if not stm:
+            raise OpSmException(-errno.ENOENT, "state machine type '{0}' not found".format(stm_type))
+        init_state = stm.get(OpSm.INIT_STATE_KEY, None)
+        return init_state == state
 
     @staticmethod
     def get_init_state(stm_type):
@@ -51,4 +60,9 @@ def get_next_state(stm_type, current_state, ret):
         next_state = stm.get(current_state, None)
         if not next_state:
             raise OpSmException(-errno.EINVAL, "invalid current state '{0}'".format(current_state))
-        return next_state[0] if ret == 0 else next_state[1]
+        if ret == 0:
+            return next_state[0]
+        elif ret == -errno.EINTR:
+            return next_state[1][1]
+        else:
+            return next_state[1][0]
diff --git a/src/pybind/mgr/volumes/fs/volume.py b/src/pybind/mgr/volumes/fs/volume.py
index 2015fe3dc212a..2f2d4e20e55b6 100644
--- a/src/pybind/mgr/volumes/fs/volume.py
+++ b/src/pybind/mgr/volumes/fs/volume.py
@@ -377,6 +377,18 @@ def clone_status(self, **kwargs):
             ret = self.volume_exception_to_retval(ve)
         return ret
 
+    def clone_cancel(self, **kwargs):
+        ret       = 0, "", ""
+        volname   = kwargs['vol_name']
+        clonename = kwargs['clone_name']
+        groupname = kwargs['group_name']
+
+        try:
+            self.cloner.cancel_job(volname, (clonename, groupname))
+        except VolumeException as ve:
+            ret = self.volume_exception_to_retval(ve)
+        return ret
+
     ### group operations
 
     def create_subvolume_group(self, **kwargs):
diff --git a/src/pybind/mgr/volumes/module.py b/src/pybind/mgr/volumes/module.py
index f12112961674f..0d8015bf48f4d 100644
--- a/src/pybind/mgr/volumes/module.py
+++ b/src/pybind/mgr/volumes/module.py
@@ -204,6 +204,14 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
             'desc': "Get status on a cloned subvolume.",
             'perm': 'r'
         },
+        {
+            'cmd': 'fs clone cancel '
+                   'name=vol_name,type=CephString '
+                   'name=clone_name,type=CephString '
+                   'name=group_name,type=CephString,req=false ',
+            'desc': "Cancel an pending or ongoing clone operation.",
+            'perm': 'r'
+        },
 
         # volume ls [recursive]
         # subvolume ls <volume>
@@ -364,3 +372,7 @@ def _cmd_fs_subvolume_snapshot_clone(self, inbuf, cmd):
     def _cmd_fs_clone_status(self, inbuf, cmd):
         return self.vc.clone_status(
             vol_name=cmd['vol_name'], clone_name=cmd['clone_name'],  group_name=cmd.get('group_name', None))
+
+    def _cmd_fs_clone_cancel(self, inbuf, cmd):
+        return self.vc.clone_cancel(
+            vol_name=cmd['vol_name'], clone_name=cmd['clone_name'],  group_name=cmd.get('group_name', None))
diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py
index e65ceaf83dd3e..0052a301c6059 100644
--- a/src/python-common/ceph/deployment/service_spec.py
+++ b/src/python-common/ceph/deployment/service_spec.py
@@ -16,6 +16,18 @@ def __init__(self, msg):
         super(ServiceSpecValidationError, self).__init__(msg)
 
 
+def assert_valid_host(name):
+    p = re.compile('^[a-zA-Z0-9-]+$')
+    try:
+        assert len(name) <= 250, 'name is too long (max 250 chars)'
+        for part in name.split('.'):
+            assert len(part) > 0, '.-delimited name component must not be empty'
+            assert len(part) <= 63, '.-delimited name component must not be more than 63 chars'
+            assert p.match(part), 'name component must include only a-z, 0-9, and -'
+    except AssertionError as e:
+        raise ServiceSpecValidationError(e)
+
+
 class HostPlacementSpec(namedtuple('HostPlacementSpec', ['hostname', 'network', 'name'])):
     def __str__(self):
         res = ''
@@ -99,9 +111,12 @@ def parse(cls, host, require_network=True):
             except ValueError as e:
                 # logging?
                 raise e
-
+        host_spec.validate()
         return host_spec
 
+    def validate(self):
+        assert_valid_host(self.hostname)
+
 
 class PlacementSpec(object):
     """
@@ -183,12 +198,16 @@ def from_json(cls, data):
         return _cls
 
     def to_json(self):
-        return {
-            'label': self.label,
-            'hosts': [host.to_json() for host in self.hosts] if self.hosts else [],
-            'count': self.count,
-            'host_pattern': self.host_pattern,
-        }
+        r = {}
+        if self.label:
+            r['label'] = self.label
+        if self.hosts:
+            r['hosts'] = [host.to_json() for host in self.hosts]
+        if self.count:
+            r['count'] = self.count
+        if self.host_pattern:
+            r['host_pattern'] = self.host_pattern
+        return r
 
     def validate(self):
         if self.hosts and self.label:
@@ -198,6 +217,8 @@ def validate(self):
             raise ServiceSpecValidationError("num/count must be > 1")
         if self.host_pattern and self.hosts:
             raise ServiceSpecValidationError('cannot combine host patterns and hosts')
+        for h in self.hosts:
+            h.validate()
 
     @classmethod
     def from_string(cls, arg):
@@ -302,17 +323,22 @@ class ServiceSpec(object):
     start the services.
 
     """
+    KNOWN_SERVICE_TYPES = 'alertmanager crash grafana mds mgr mon nfs ' \
+                          'node-exporter osd prometheus rbd-mirror rgw'.split()
 
     def __init__(self,
-                 service_type,  # type: str
+                 service_type,     # type: str
                  service_id=None,  # type: Optional[str]
-                 placement: Optional[PlacementSpec] = None,
-                 count: Optional[int] = None):
+                 placement=None,   # type: Optional[PlacementSpec]
+                 count=None,       # type: Optional[int]
+                 unmanaged=False,  # type: bool
+                 ):
         self.placement = PlacementSpec() if placement is None else placement  # type: PlacementSpec
 
-        assert service_type
+        assert service_type in ServiceSpec.KNOWN_SERVICE_TYPES, service_type
         self.service_type = service_type
         self.service_id = service_id
+        self.unmanaged = unmanaged
 
     @classmethod
     def from_json(cls, json_spec):
@@ -358,9 +384,12 @@ def service_name(self):
 
     def to_json(self):
         # type: () -> Dict[str, Any]
-        c = self.__dict__.copy()
-        if self.placement:
-            c['placement'] = self.placement.to_json()
+        c = {}
+        for key, val in self.__dict__.items():
+            if hasattr(val, 'to_json'):
+                val = val.to_json()
+            if val:
+                c[key] = val
         return c
 
     def validate(self):
@@ -373,6 +402,9 @@ def validate(self):
     def __repr__(self):
         return "{}({!r})".format(self.__class__.__name__, self.__dict__)
 
+    def one_line_str(self):
+        return '<{} for service_name={}>'.format(self.__class__.__name__, self.service_name())
+
 
 def servicespec_validate_add(self: ServiceSpec):
     # This must not be a method of ServiceSpec, otherwise you'll hunt
@@ -384,9 +416,11 @@ def servicespec_validate_add(self: ServiceSpec):
 
 class NFSServiceSpec(ServiceSpec):
     def __init__(self, service_id, pool=None, namespace=None, placement=None,
-                 service_type='nfs'):
+                 service_type='nfs', unmanaged=False):
         assert service_type == 'nfs'
-        super(NFSServiceSpec, self).__init__('nfs', service_id=service_id, placement=placement)
+        super(NFSServiceSpec, self).__init__(
+            'nfs', service_id=service_id,
+            placement=placement, unmanaged=unmanaged)
 
         #: RADOS pool where NFS client recovery data is stored.
         self.pool = pool
@@ -413,13 +447,16 @@ def __init__(self,
                  placement=None,
                  service_type='rgw',
                  rgw_frontend_port=None,  # type: Optional[int]
+                 unmanaged=False,  # type: bool
                  ):
         assert service_type == 'rgw'
         if service_id:
             (rgw_realm, rgw_zone) = service_id.split('.', 1)
         else:
             service_id = '%s.%s' % (rgw_realm, rgw_zone)
-        super(RGWSpec, self).__init__('rgw', service_id=service_id, placement=placement)
+        super(RGWSpec, self).__init__(
+            'rgw', service_id=service_id,
+            placement=placement, unmanaged=unmanaged)
 
         self.rgw_realm = rgw_realm
         self.rgw_zone = rgw_zone
diff --git a/src/test/librados/misc.cc b/src/test/librados/misc.cc
index 033090ba13f2f..8d22244d32647 100644
--- a/src/test/librados/misc.cc
+++ b/src/test/librados/misc.cc
@@ -318,7 +318,12 @@ static void shutdown_racer_func()
   int i;
 
   for (i = 0; i < niter; ++i) {
-    ASSERT_EQ("", connect_cluster(&rad));
+    auto r = connect_cluster(&rad);
+    if (getenv("ALLOW_TIMEOUTS")) {
+      ASSERT_TRUE(r == "" || r == "rados_connect failed with error -110");
+    } else {
+      ASSERT_EQ("", r);
+    }
     rados_shutdown(rad);
   }
 }
diff --git a/src/test/librados/watch_notify.cc b/src/test/librados/watch_notify.cc
index 69dd68f5072de..4154298f0d9d6 100644
--- a/src/test/librados/watch_notify.cc
+++ b/src/test/librados/watch_notify.cc
@@ -92,7 +92,15 @@ TEST_F(LibRadosWatchNotify, WatchNotify) {
   uint64_t handle;
   ASSERT_EQ(0,
       rados_watch(ioctx, "foo", 0, &handle, watch_notify_test_cb, NULL));
-  ASSERT_EQ(0, rados_notify(ioctx, "foo", 0, NULL, 0));
+  for (unsigned i=0; i<10; ++i) {
+    int r = rados_notify(ioctx, "foo", 0, NULL, 0);
+    if (r == 0) {
+      break;
+    }
+    if (!getenv("ALLOW_TIMEOUTS")) {
+      ASSERT_EQ(0, r);
+    }
+  }
   TestAlarm alarm;
   sem_wait(sem);
   rados_unwatch(ioctx, "foo", handle);
@@ -112,7 +120,15 @@ TEST_F(LibRadosWatchNotifyEC, WatchNotify) {
   uint64_t handle;
   ASSERT_EQ(0,
       rados_watch(ioctx, "foo", 0, &handle, watch_notify_test_cb, NULL));
-  ASSERT_EQ(0, rados_notify(ioctx, "foo", 0, NULL, 0));
+  for (unsigned i=0; i<10; ++i) {
+    int r = rados_notify(ioctx, "foo", 0, NULL, 0);
+    if (r == 0) {
+      break;
+    }
+    if (!getenv("ALLOW_TIMEOUTS")) {
+      ASSERT_EQ(0, r);
+    }
+  }
   TestAlarm alarm;
   sem_wait(sem);
   rados_unwatch(ioctx, "foo", handle);
diff --git a/src/test/librados/watch_notify_cxx.cc b/src/test/librados/watch_notify_cxx.cc
index 011d7bada3123..c74ccbe915aa2 100644
--- a/src/test/librados/watch_notify_cxx.cc
+++ b/src/test/librados/watch_notify_cxx.cc
@@ -94,7 +94,15 @@ TEST_P(LibRadosWatchNotifyPP, WatchNotify) {
   ASSERT_EQ(0, ioctx.list_watchers("foo", &watches));
   ASSERT_EQ(1u, watches.size());
   bufferlist bl2;
-  ASSERT_EQ(0, ioctx.notify("foo", 0, bl2));
+  for (unsigned i=0; i<10; ++i) {
+    int r = ioctx.notify("foo", 0, bl2);
+    if (r == 0) {
+      break;
+    }
+    if (!getenv("ALLOW_TIMEOUTS")) {
+      ASSERT_EQ(0, r);
+    }
+  }
   TestAlarm alarm;
   sem_wait(sem);
   ioctx.unwatch("foo", handle);
@@ -115,7 +123,15 @@ TEST_F(LibRadosWatchNotifyECPP, WatchNotify) {
   ASSERT_EQ(0, ioctx.list_watchers("foo", &watches));
   ASSERT_EQ(1u, watches.size());
   bufferlist bl2;
-  ASSERT_EQ(0, ioctx.notify("foo", 0, bl2));
+  for (unsigned i=0; i<10; ++i) {
+    int r = ioctx.notify("foo", 0, bl2);
+    if (r == 0) {
+      break;
+    }
+    if (!getenv("ALLOW_TIMEOUTS")) {
+      ASSERT_EQ(0, r);
+    }
+  }
   TestAlarm alarm;
   sem_wait(sem);
   ioctx.unwatch("foo", handle);
diff --git a/src/vstart.sh b/src/vstart.sh
index ad84939d941ac..31c67ee45dea3 100755
--- a/src/vstart.sh
+++ b/src/vstart.sh
@@ -997,6 +997,7 @@ EOF
         ceph_adm orch set backend cephadm
         ceph_adm orch host add $HOSTNAME
         ceph_adm orch apply crash '*'
+        ceph_adm config set mgr mgr/cephadm/allow_ptrace true
     fi
 }
 
diff --git a/test_cephadm.sh b/test_cephadm.sh
index def9d7905cc1a..4722bcc303ef8 100755
--- a/test_cephadm.sh
+++ b/test_cephadm.sh
@@ -3,7 +3,7 @@
 SCRIPT_NAME=$(basename ${BASH_SOURCE[0]})
 
 fsid='00000000-0000-0000-0000-0000deadbeef'
-image='docker.io/ceph/daemon-base:latest-master-devel'
+image='quay.io/ceph-ci/ceph:octopus'
 [ -z "$ip" ] && ip=127.0.0.1
 
 OSD_IMAGE_NAME="${SCRIPT_NAME%.*}_osd.img"
@@ -41,7 +41,8 @@ $CEPHADM $CEPHADM_ARGS \
     --config c \
     --output-keyring k \
     --output-config c \
-    --allow-overwrite
+    --allow-overwrite \
+    --skip-mon-network
 chmod 644 k c
 
 # mon.b