diff --git a/README.md b/README.md
index 0e8b2d9c89..ed6ae1b529 100755
--- a/README.md
+++ b/README.md
@@ -318,6 +318,7 @@ The [build](docs/source/build.md) instructions contain more details on building
 
 The folder structure mirrors other RAPIDS repos, with the following folders:
 
+- `bench/ann`: Python scripts for running ANN benchmarks
 - `ci`: Scripts for running CI in PRs
 - `conda`: Conda recipes and development conda environments
 - `cpp`: Source code for C++ libraries.
diff --git a/bench/ann/data_export.py b/bench/ann/data_export.py
index df48882840..9410cfe773 100644
--- a/bench/ann/data_export.py
+++ b/bench/ann/data_export.py
@@ -19,7 +19,7 @@
 
 
 def export_results(output_filepath, recompute, groundtruth_filepath,
-                   result_filepaths):
+                   result_filepath):
     print(f"Writing output file to: {output_filepath}")
     ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
                                          "cpp/bench/ann/scripts")
@@ -27,10 +27,10 @@ def export_results(output_filepath, recompute, groundtruth_filepath,
                                           "eval.pl")
     if recompute:
         p = subprocess.Popen([ann_bench_scripts_path, "-f", "-o", output_filepath,
-                              groundtruth_filepath] + result_filepaths)
+                              groundtruth_filepath, result_filepath])
     else:
         p = subprocess.Popen([ann_bench_scripts_path, "-o", output_filepath,
-                              groundtruth_filepath] + result_filepaths)
+                              groundtruth_filepath, result_filepath])
     p.wait()
 
 
@@ -51,16 +51,13 @@ def main():
                              "bench", "ann", "data")
     )
     
-    args, result_filepaths = parser.parse_known_args()
-
-    # if nothing is provided
-    if len(result_filepaths) == 0:
-        raise ValueError("No filepaths to results were provided")
+    args = parser.parse_args()
 
+    result_filepath = os.path.join(args.dataset_path, args.dataset, "result")
     groundtruth_filepath = os.path.join(args.dataset_path, args.dataset, 
                                         "groundtruth.neighbors.ibin")
     export_results(args.output, args.recompute, groundtruth_filepath,
-                   result_filepaths)
+                   result_filepath)
 
 
 if __name__ == "__main__":
diff --git a/bench/ann/run.py b/bench/ann/run.py
index ebaef1e004..d8e33f1113 100644
--- a/bench/ann/run.py
+++ b/bench/ann/run.py
@@ -146,8 +146,14 @@ def main():
         conf_file = json.load(f)
 
     # Replace base, query to dataset-path
-    conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
-    conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
+    replacement_base_filepath = \
+        os.path.normpath(conf_file["dataset"]["base_file"]).split(os.path.sep)[-1]
+    conf_file["dataset"]["base_file"] = \
+        os.path.join(dataset_path, replacement_base_filepath)
+    replacement_query_filepath = \
+        os.path.normpath(conf_file["dataset"]["query_file"]).split(os.path.sep)[-1]
+    conf_file["dataset"]["query_file"] = \
+        os.path.join(dataset_path, replacement_query_filepath)
     # Ensure base and query files exist for dataset
     if not os.path.exists(conf_file["dataset"]["base_file"]):
         raise FileNotFoundError(conf_file["dataset"]["base_file"])
diff --git a/ci/test_wheel_raft_dask.sh b/ci/test_wheel_raft_dask.sh
index 9f8e8ce02a..676d642de9 100755
--- a/ci/test_wheel_raft_dask.sh
+++ b/ci/test_wheel_raft_dask.sh
@@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels
 python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl
 
 # Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@2023.7.1 git+https://github.com/dask/distributed.git@2023.7.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10
+python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10
 
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/raft_dask*.whl)[test]
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 223bafe70b..7e921decd5 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core==2023.7.1
+- dask-core>=2023.7.1
 - dask-cuda==23.10.*
-- dask==2023.7.1
-- distributed==2023.7.1
+- dask>=2023.7.1
+- distributed>=2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
index e68feaad82..2ea685b529 100644
--- a/conda/environments/all_cuda-120_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -19,10 +19,10 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core==2023.7.1
+- dask-core>=2023.7.1
 - dask-cuda==23.10.*
-- dask==2023.7.1
-- distributed==2023.7.1
+- dask>=2023.7.1
+- distributed>=2023.7.1
 - doxygen>=1.8.20
 - gcc_linux-64=11.*
 - gmock>=1.13.0
diff --git a/conda/recipes/raft-dask/meta.yaml b/conda/recipes/raft-dask/meta.yaml
index cf1f8488bc..c9caa4dd9b 100644
--- a/conda/recipes/raft-dask/meta.yaml
+++ b/conda/recipes/raft-dask/meta.yaml
@@ -60,10 +60,10 @@ requirements:
     - cudatoolkit
     {% endif %}
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
-    - dask ==2023.7.1
-    - dask-core ==2023.7.1
+    - dask >=2023.7.1
+    - dask-core >=2023.7.1
     - dask-cuda ={{ minor_version }}
-    - distributed ==2023.7.1
+    - distributed >=2023.7.1
     - joblib >=0.11
     - nccl >=2.9.9
     - pylibraft {{ version }}
diff --git a/cpp/include/raft/neighbors/cagra_serialize.cuh b/cpp/include/raft/neighbors/cagra_serialize.cuh
index 2242629409..0a806402d2 100644
--- a/cpp/include/raft/neighbors/cagra_serialize.cuh
+++ b/cpp/include/raft/neighbors/cagra_serialize.cuh
@@ -47,12 +47,16 @@ namespace raft::neighbors::cagra {
  * @param[in] handle the raft handle
  * @param[in] os output stream
  * @param[in] index CAGRA index
+ * @param[in] include_dataset Whether or not to write out the dataset to the file.
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::resources const& handle, std::ostream& os, const index<T, IdxT>& index)
+void serialize(raft::resources const& handle,
+               std::ostream& os,
+               const index<T, IdxT>& index,
+               bool include_dataset = true)
 {
-  detail::serialize(handle, os, index);
+  detail::serialize(handle, os, index, include_dataset);
 }
 
 /**
@@ -77,14 +81,16 @@ void serialize(raft::resources const& handle, std::ostream& os, const index<T, I
  * @param[in] handle the raft handle
  * @param[in] filename the file name for saving the index
  * @param[in] index CAGRA index
+ * @param[in] include_dataset Whether or not to write out the dataset to the file.
  *
  */
 template <typename T, typename IdxT>
 void serialize(raft::resources const& handle,
                const std::string& filename,
-               const index<T, IdxT>& index)
+               const index<T, IdxT>& index,
+               bool include_dataset = true)
 {
-  detail::serialize(handle, filename, index);
+  detail::serialize(handle, filename, index, include_dataset);
 }
 
 /**
@@ -158,4 +164,4 @@ namespace raft::neighbors::experimental::cagra {
 using raft::neighbors::cagra::deserialize;
 using raft::neighbors::cagra::serialize;
 
-}  // namespace raft::neighbors::experimental::cagra
\ No newline at end of file
+}  // namespace raft::neighbors::experimental::cagra
diff --git a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
index 8d040c352b..2c9cbd2563 100644
--- a/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
+++ b/cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
@@ -24,8 +24,7 @@
 
 namespace raft::neighbors::cagra::detail {
 
-// Serialization version 1.
-constexpr int serialization_version = 2;
+constexpr int serialization_version = 3;
 
 // NB: we wrap this check in a struct, so that the updated RealSize is easy to see in the error
 // message.
@@ -50,41 +49,53 @@ template struct check_index_layout<sizeof(index<double, std::uint64_t>), expecte
  *
  */
 template <typename T, typename IdxT>
-void serialize(raft::resources const& res, std::ostream& os, const index<T, IdxT>& index_)
+void serialize(raft::resources const& res,
+               std::ostream& os,
+               const index<T, IdxT>& index_,
+               bool include_dataset)
 {
   RAFT_LOG_DEBUG(
     "Saving CAGRA index, size %zu, dim %u", static_cast<size_t>(index_.size()), index_.dim());
 
+  std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype<T>().to_string();
+  dtype_string.resize(4);
+  os << dtype_string;
+
   serialize_scalar(res, os, serialization_version);
   serialize_scalar(res, os, index_.size());
   serialize_scalar(res, os, index_.dim());
   serialize_scalar(res, os, index_.graph_degree());
   serialize_scalar(res, os, index_.metric());
-  auto dataset = index_.dataset();
-  // Remove padding before saving the dataset
-  auto host_dataset = make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
-  RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
-                                  sizeof(T) * host_dataset.extent(1),
-                                  dataset.data_handle(),
-                                  sizeof(T) * dataset.stride(0),
-                                  sizeof(T) * host_dataset.extent(1),
-                                  dataset.extent(0),
-                                  cudaMemcpyDefault,
-                                  resource::get_cuda_stream(res)));
-  resource::sync_stream(res);
-  serialize_mdspan(res, os, host_dataset.view());
   serialize_mdspan(res, os, index_.graph());
+
+  serialize_scalar(res, os, include_dataset);
+  if (include_dataset) {
+    auto dataset = index_.dataset();
+    // Remove padding before saving the dataset
+    auto host_dataset = make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
+    RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    dataset.data_handle(),
+                                    sizeof(T) * dataset.stride(0),
+                                    sizeof(T) * host_dataset.extent(1),
+                                    dataset.extent(0),
+                                    cudaMemcpyDefault,
+                                    resource::get_cuda_stream(res)));
+    resource::sync_stream(res);
+    serialize_mdspan(res, os, host_dataset.view());
+  }
 }
 
 template <typename T, typename IdxT>
 void serialize(raft::resources const& res,
                const std::string& filename,
-               const index<T, IdxT>& index_)
+               const index<T, IdxT>& index_,
+               bool include_dataset)
 {
   std::ofstream of(filename, std::ios::out | std::ios::binary);
   if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); }
 
-  detail::serialize(res, of, index_);
+  detail::serialize(res, of, index_, include_dataset);
 
   of.close();
   if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); }
@@ -102,6 +113,9 @@ void serialize(raft::resources const& res,
 template <typename T, typename IdxT>
 auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>
 {
+  char dtype_string[4];
+  is.read(dtype_string, 4);
+
   auto ver = deserialize_scalar<int>(res, is);
   if (ver != serialization_version) {
     RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver);
@@ -113,9 +127,11 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>
 
   auto dataset = raft::make_host_matrix<T, int64_t>(n_rows, dim);
   auto graph   = raft::make_host_matrix<IdxT, int64_t>(n_rows, graph_degree);
-  deserialize_mdspan(res, is, dataset.view());
   deserialize_mdspan(res, is, graph.view());
 
+  bool has_dataset = deserialize_scalar<bool>(res, is);
+  if (has_dataset) { deserialize_mdspan(res, is, dataset.view()); }
+
   return index<T, IdxT>(
     res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view()));
 }
diff --git a/cpp/include/raft/neighbors/detail/nn_descent.cuh b/cpp/include/raft/neighbors/detail/nn_descent.cuh
index 74d27daac1..6ef651d37f 100644
--- a/cpp/include/raft/neighbors/detail/nn_descent.cuh
+++ b/cpp/include/raft/neighbors/detail/nn_descent.cuh
@@ -1206,13 +1206,13 @@ void GNND<Data_t, Index_t>::build(Data_t* data,
                                   Index_t* output_graph,
                                   cudaStream_t stream)
 {
-  cudaStreamSynchronize(stream);
   nrow_          = nrow;
   graph_.h_graph = (InternalID_t<Index_t>*)output_graph;
 
   cudaPointerAttributes data_ptr_attr;
   RAFT_CUDA_TRY(cudaPointerGetAttributes(&data_ptr_attr, data));
   if (data_ptr_attr.type == cudaMemoryTypeUnregistered) {
+    std::cout << "HERE AS EXPECTED" << std::endl;
     typename std::remove_const<Data_t>::type* input_data;
     size_t batch_size = 100000;
     RAFT_CUDA_TRY(cudaMallocAsync(
@@ -1378,7 +1378,7 @@ index<IdxT> build(raft::resources const& res,
                   const index_params& params,
                   mdspan<const T, matrix_extent<int64_t>, row_major, Accessor> dataset)
 {
-  RAFT_EXPECTS(dataset.size() < std::numeric_limits<int>::max() - 1,
+  RAFT_EXPECTS(dataset.extent(0) < std::numeric_limits<int>::max() - 1,
                "The dataset size for GNND should be less than %d",
                std::numeric_limits<int>::max() - 1);
   size_t intermediate_degree = params.intermediate_graph_degree;
@@ -1415,9 +1415,9 @@ index<IdxT> build(raft::resources const& res,
                            .termination_threshold = params.termination_threshold,
                            .metric_type           = Metric_t::METRIC_L2};
 
-  GNND<const T, int> nnd(build_config);
   std::cout << "Intermediate graph dim: " << int_graph.extent(0) << ", " << int_graph.extent(1)
             << std::endl;
+  GNND<const T, int> nnd(build_config);
   nnd.build(dataset.data_handle(),
             dataset.extent(0),
             int_graph.data_handle(),
diff --git a/cpp/include/raft_runtime/neighbors/cagra.hpp b/cpp/include/raft_runtime/neighbors/cagra.hpp
index 6f56302776..c54ed32b77 100644
--- a/cpp/include/raft_runtime/neighbors/cagra.hpp
+++ b/cpp/include/raft_runtime/neighbors/cagra.hpp
@@ -56,14 +56,16 @@ namespace raft::runtime::neighbors::cagra {
               raft::device_matrix_view<float, int64_t, row_major> distances);      \
   void serialize_file(raft::resources const& handle,                               \
                       const std::string& filename,                                 \
-                      const raft::neighbors::cagra::index<T, IdxT>& index);        \
+                      const raft::neighbors::cagra::index<T, IdxT>& index,         \
+                      bool include_dataset = true);                                \
                                                                                    \
   void deserialize_file(raft::resources const& handle,                             \
                         const std::string& filename,                               \
                         raft::neighbors::cagra::index<T, IdxT>* index);            \
   void serialize(raft::resources const& handle,                                    \
                  std::string& str,                                                 \
-                 const raft::neighbors::cagra::index<T, IdxT>& index);             \
+                 const raft::neighbors::cagra::index<T, IdxT>& index,              \
+                 bool include_dataset = true);                                     \
                                                                                    \
   void deserialize(raft::resources const& handle,                                  \
                    const std::string& str,                                         \
diff --git a/cpp/src/raft_runtime/neighbors/cagra_serialize.cu b/cpp/src/raft_runtime/neighbors/cagra_serialize.cu
index be9788562a..69b48b93a4 100644
--- a/cpp/src/raft_runtime/neighbors/cagra_serialize.cu
+++ b/cpp/src/raft_runtime/neighbors/cagra_serialize.cu
@@ -27,9 +27,10 @@ namespace raft::runtime::neighbors::cagra {
 #define RAFT_INST_CAGRA_SERIALIZE(DTYPE)                                             \
   void serialize_file(raft::resources const& handle,                                 \
                       const std::string& filename,                                   \
-                      const raft::neighbors::cagra::index<DTYPE, uint32_t>& index)   \
+                      const raft::neighbors::cagra::index<DTYPE, uint32_t>& index,   \
+                      bool include_dataset)                                          \
   {                                                                                  \
-    raft::neighbors::cagra::serialize(handle, filename, index);                      \
+    raft::neighbors::cagra::serialize(handle, filename, index, include_dataset);     \
   };                                                                                 \
                                                                                      \
   void deserialize_file(raft::resources const& handle,                               \
@@ -41,10 +42,11 @@ namespace raft::runtime::neighbors::cagra {
   };                                                                                 \
   void serialize(raft::resources const& handle,                                      \
                  std::string& str,                                                   \
-                 const raft::neighbors::cagra::index<DTYPE, uint32_t>& index)        \
+                 const raft::neighbors::cagra::index<DTYPE, uint32_t>& index,        \
+                 bool include_dataset)                                               \
   {                                                                                  \
     std::stringstream os;                                                            \
-    raft::neighbors::cagra::serialize(handle, os, index);                            \
+    raft::neighbors::cagra::serialize(handle, os, index, include_dataset);           \
     str = os.str();                                                                  \
   }                                                                                  \
                                                                                      \
diff --git a/cpp/test/neighbors/ann_cagra.cuh b/cpp/test/neighbors/ann_cagra.cuh
index 1f82d6771b..734599057b 100644
--- a/cpp/test/neighbors/ann_cagra.cuh
+++ b/cpp/test/neighbors/ann_cagra.cuh
@@ -138,6 +138,7 @@ struct AnnCagraInputs {
   int search_width;
   raft::distance::DistanceType metric;
   bool host_dataset;
+  bool include_serialized_dataset;
   // std::optional<double>
   double min_recall;  // = std::nullopt;
 };
@@ -220,9 +221,11 @@ class AnnCagraTest : public ::testing::TestWithParam<AnnCagraInputs> {
           } else {
             index = cagra::build<DataT, IdxT>(handle_, index_params, database_view);
           };
-          cagra::serialize(handle_, "cagra_index", index);
+          cagra::serialize(handle_, "cagra_index", index, ps.include_serialized_dataset);
         }
+
         auto index = cagra::deserialize<DataT, IdxT>(handle_, "cagra_index");
+        if (!ps.include_serialized_dataset) { index.update_dataset(handle_, database_view); }
 
         auto search_queries_view = raft::make_device_matrix_view<const DataT, int64_t>(
           search_queries.data(), ps.n_queries, ps.dim);
@@ -361,9 +364,7 @@ class AnnCagraSortTest : public ::testing::TestWithParam<AnnCagraInputs> {
 
   void SetUp() override
   {
-    std::cout << "Resizing database: " << ps.n_rows * ps.dim << std::endl;
     database.resize(((size_t)ps.n_rows) * ps.dim, handle_.get_stream());
-    std::cout << "Done.\nRuning rng" << std::endl;
     raft::random::Rng r(1234ULL);
     if constexpr (std::is_same<DataT, float>{}) {
       GenerateRoundingErrorFreeDataset(database.data(), ps.n_rows, ps.dim, r, handle_.get_stream());
@@ -401,6 +402,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {1},
     {raft::distance::DistanceType::L2Expanded},
     {false},
+    {true},
     {0.995});
 
   auto inputs2 = raft::util::itertools::product<AnnCagraInputs>(
@@ -416,6 +418,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {1},
     {raft::distance::DistanceType::L2Expanded},
     {false},
+    {true},
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
   inputs2 = raft::util::itertools::product<AnnCagraInputs>(
@@ -431,6 +434,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {1},
     {raft::distance::DistanceType::L2Expanded},
     {false},
+    {false},
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
@@ -447,6 +451,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {1},
     {raft::distance::DistanceType::L2Expanded},
     {false},
+    {true},
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
@@ -463,6 +468,7 @@ inline std::vector<AnnCagraInputs> generate_inputs()
     {1},
     {raft::distance::DistanceType::L2Expanded},
     {false, true},
+    {false},
     {0.995});
   inputs.insert(inputs.end(), inputs2.begin(), inputs2.end());
 
diff --git a/dependencies.yaml b/dependencies.yaml
index ee04c886d7..cf8170b9a1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -305,16 +305,16 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - dask==2023.7.1
+          - dask>=2023.7.1
           - dask-cuda==23.10.*
-          - distributed==2023.7.1
+          - distributed>=2023.7.1
           - joblib>=0.11
           - numba>=0.57
           - *numpy
           - ucx-py==0.34.*
       - output_types: conda
         packages:
-          - dask-core==2023.7.1
+          - dask-core>=2023.7.1
           - ucx>=1.13.0
           - ucx-proc=*=gpu
       - output_types: pyproject
diff --git a/docs/source/raft_ann_benchmarks.md b/docs/source/raft_ann_benchmarks.md
index 88dac9fabd..2985a8120b 100644
--- a/docs/source/raft_ann_benchmarks.md
+++ b/docs/source/raft_ann_benchmarks.md
@@ -6,13 +6,13 @@ This project provides a benchmark program for various ANN search implementations
 
 The easiest way to install these benchmarks is through conda. We suggest using mamba as it generally leads to a faster install time::
 ```bash
+git clone https://github.com/rapidsai/raft.git && cd raft
+export RAFT_HOME=$(pwd)
+
 mamba env create --name raft_ann_benchmarks -f conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
 conda activate raft_ann_benchmarks
 
 mamba install -c rapidsai -c conda-forge -c nvidia libraft libraft-ann-bench cudatoolkit=11.8*
-
-git clone https://github.com/rapidsai/raft.git && cd raft
-export RAFT_HOME=$(pwd)
 ```
 The channel `rapidsai` can easily be substituted `rapidsai-nightly` if nightly benchmarks are desired.
 
@@ -35,50 +35,50 @@ expected to be defined to run these scripts; this variable holds the directory w
 ### End-to-end example: Million-scale
 ```bash
 export RAFT_HOME=$(pwd)
-# All scripts are present in directory raft/scripts/ann-benchmarks
+# All scripts are present in directory raft/bench/ann
 
 # (1) prepare dataset
-python scripts/ann-benchmarks/get_dataset.py --dataset glove-100-angular --normalize
+python bench/ann/get_dataset.py --dataset glove-100-angular --normalize
 
 # (2) build and search index
-python scripts/ann-benchmarks/run.py --configuration bench/ann/conf/glove-100-inner.json
+python bench/ann/run.py --dataset glove-100-inner
 
 # (3) evaluate results
-python scripts/ann-benchmarks/data_export.py --output out.csv --dataset glove-100-inner result/glove-100-inner/
+python bench/ann/data_export.py --output out.csv --dataset glove-100-inner
 
 # (4) plot results
-python scripts/ann-benchmarks/plot.py --result-csv out.csv
+python bench/ann/plot.py --result-csv out.csv
 ```
 
 ### End-to-end example: Billion-scale
-`scripts/get_dataset.py` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) 
+`bench/ann/get_dataset.py` cannot be used to download the [billion-scale datasets](ann_benchmarks_dataset.md#billion-scale) 
 because they are so large. You should instead use our billion-scale datasets guide to download and prepare them.
-All other python scripts mentioned below work as intended once the
+All other python  mentioned below work as intended once the
 billion-scale dataset has been downloaded.
 To download Billion-scale datasets, visit [big-ann-benchmarks](http://big-ann-benchmarks.com/neurips21.html)
 
 ```bash
 export RAFT_HOME=$(pwd)
-# All scripts are present in directory raft/scripts/ann-benchmarks
+# All scripts are present in directory raft/bench/ann
 
 mkdir -p data/deep-1B
 # (1) prepare dataset
 # download manually "Ground Truth" file of "Yandex DEEP"
 # suppose the file name is deep_new_groundtruth.public.10K.bin
-python scripts/ann-benchmarks/split_groundtruth.py --groundtruth data/deep-1B/deep_new_groundtruth.public.10K.bin
+python bench/ann/split_groundtruth.py --groundtruth data/deep-1B/deep_new_groundtruth.public.10K.bin
 # two files 'groundtruth.neighbors.ibin' and 'groundtruth.distances.fbin' should be produced
 
 # (2) build and search index
-python scripts/ann-benchmarks/run.py --configuration bench/ann/conf/deep-1B.json
+python bench/ann/run.py --dataset deep-1B
 
 # (3) evaluate results
-python scripts/ann-benchmarks/data_export.py --output out.csv --dataset deep-1B result/deep-1B/
+python bench/ann/data_export.py --output out.csv --dataset deep-1B
 
 # (4) plot results
-python scripts/ann-benchmarks/plot.py --result-csv out.csv
+python bench/ann/plot.py --result-csv out.csv
 ```
 
-The usage of `scripts/ann-benchmarks/split-groundtruth.py` is:
+The usage of `bench/ann/split-groundtruth.py` is:
 ```bash
 usage: split_groundtruth.py [-h] --groundtruth GROUNDTRUTH
 
@@ -89,7 +89,7 @@ options:
 ```
 
 ##### Step 1: Prepare Dataset<a id='prep-dataset'></a>
-The script `scripts/ann-benchmarks/get_dataset.py` will download and unpack the dataset in directory
+The script `bench/ann/get_dataset.py` will download and unpack the dataset in directory
 that the user provides. As of now, only million-scale datasets are supported by this
 script. For more information on [datasets and formats](ann_benchmarks_dataset.md).
 
@@ -107,9 +107,10 @@ options:
 When option `normalize` is provided to the script, any dataset that has cosine distances
 will be normalized to inner product. So, for example, the dataset `glove-100-angular` 
 will be written at location `${RAFT_HOME}/bench/ann/data/glove-100-inner/`.
+```
 
 #### Step 2: Build and Search Index
-The script `scripts/ann-benchmarks/run.py` will build and search indices for a given dataset and its
+The script `bench/ann/run.py` will build and search indices for a given dataset and its
 specified configuration.
 To confirgure which algorithms are available, we use `algos.yaml`.
 To configure building/searching indices for a dataset, look at [index configuration](#json-index-config).
@@ -123,7 +124,7 @@ raft_ivf_pq:
 available in `raft/cpp/build/`.
 `disabled` : denotes whether an algorithm should be excluded from benchmark runs.
 
-The usage of the script `scripts/run.py` is:
+The usage of the script `bench/ann/run.py` is:
 ```bash
 usage: run.py [-h] [--configuration CONFIGURATION] [--dataset DATASET] [--build] [--search] [--algorithms ALGORITHMS] [--indices INDICES] [-f]
 
@@ -161,13 +162,13 @@ it is assumed both are `True`.
 is available in `algos.yaml` and not disabled, as well as having an associated executable.
 
 #### Step 3: Evaluating Results
-The script `scripts/ann-benchmarks/data_export.py` will evaluate results for a dataset whose index has been built
-and search with at least one algorithm. For every result file that is supplied to the script, the output
+The script `bench/ann/data_export.py` will evaluate results for a dataset whose index has been built
+and searched with at least one algorithm. For every result file that is available to the script, the output
 will be combined and written to a CSV file.
 
 The usage of this script is:
 ```bash
-usage: data_export.py [-h] --output OUTPUT [--recompute] [--dataset DATASET] [--dataset-path DATASET_PATH] <result-filepaths>
+usage: data_export.py [-h] --output OUTPUT [--recompute] [--dataset DATASET] [--dataset-path DATASET_PATH]
 
 options:
   -h, --help            show this help message and exit
@@ -178,10 +179,8 @@ options:
                         path to dataset folder (default: ${RAFT_HOME}/bench/ann/data)
 ```
 
-`result-filepaths` : whitespace delimited list of result files/directories that can be captured via pattern match. For more [information and examples](ann_benchmarks_low_level.html#result-filepath-example)
-
 #### Step 4: Plot Results
-The script `scripts/ann-benchmarks/plot.py` will plot all results evaluated to a CSV file for a given dataset.
+The script `bench/ann/plot.py` will plot all results evaluated to a CSV file for a given dataset.
 
 The usage of this script is:
 ```bash
diff --git a/python/pylibraft/pylibraft/common/mdspan.pxd b/python/pylibraft/pylibraft/common/mdspan.pxd
index 6b202c2b69..17dd2d8bfd 100644
--- a/python/pylibraft/pylibraft/common/mdspan.pxd
+++ b/python/pylibraft/pylibraft/common/mdspan.pxd
@@ -30,6 +30,12 @@ from pylibraft.common.cpp.mdspan cimport (
 from pylibraft.common.handle cimport device_resources
 from pylibraft.common.optional cimport make_optional, optional
 
+# Cython doesn't like `const float` inside template parameters
+# hack around this with using typedefs
+ctypedef const float const_float
+ctypedef const int8_t const_int8_t
+ctypedef const uint8_t const_uint8_t
+
 
 cdef device_matrix_view[float, int64_t, row_major] get_dmv_float(
     array, check_shape) except *
@@ -49,6 +55,15 @@ cdef optional[device_matrix_view[int64_t, int64_t, row_major]] make_optional_vie
 cdef device_matrix_view[uint32_t, int64_t, row_major] get_dmv_uint32(
     array, check_shape) except *
 
+cdef device_matrix_view[const_float, int64_t, row_major] get_const_dmv_float(
+    array, check_shape) except *
+
+cdef device_matrix_view[const_uint8_t, int64_t, row_major] get_const_dmv_uint8(
+    array, check_shape) except *
+
+cdef device_matrix_view[const_int8_t, int64_t, row_major] get_const_dmv_int8(
+    array, check_shape) except *
+
 cdef host_matrix_view[float, int64_t, row_major] get_hmv_float(
     array, check_shape) except *
 
@@ -63,3 +78,12 @@ cdef host_matrix_view[int64_t, int64_t, row_major] get_hmv_int64(
 
 cdef host_matrix_view[uint32_t, int64_t, row_major] get_hmv_uint32(
     array, check_shape) except *
+
+cdef host_matrix_view[const_float, int64_t, row_major] get_const_hmv_float(
+    array, check_shape) except *
+
+cdef host_matrix_view[const_uint8_t, int64_t, row_major] get_const_hmv_uint8(
+    array, check_shape) except *
+
+cdef host_matrix_view[const_int8_t, int64_t, row_major] get_const_hmv_int8(
+    array, check_shape) except *
diff --git a/python/pylibraft/pylibraft/common/mdspan.pyx b/python/pylibraft/pylibraft/common/mdspan.pyx
index 1219b1612d..7442a6bb89 100644
--- a/python/pylibraft/pylibraft/common/mdspan.pyx
+++ b/python/pylibraft/pylibraft/common/mdspan.pyx
@@ -193,6 +193,39 @@ cdef device_matrix_view[int64_t, int64_t, row_major] \
         <int64_t*><uintptr_t>cai.data, shape[0], shape[1])
 
 
+cdef device_matrix_view[const_float, int64_t, row_major] \
+        get_const_dmv_float(cai, check_shape) except *:
+    if cai.dtype != np.float32:
+        raise TypeError("dtype %s not supported" % cai.dtype)
+    if check_shape and len(cai.shape) != 2:
+        raise ValueError("Expected a 2D array, got %d D" % len(cai.shape))
+    shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
+    return make_device_matrix_view[const_float, int64_t, row_major](
+        <const float*><uintptr_t>cai.data, shape[0], shape[1])
+
+
+cdef device_matrix_view[const_uint8_t, int64_t, row_major] \
+        get_const_dmv_uint8(cai, check_shape) except *:
+    if cai.dtype != np.uint8:
+        raise TypeError("dtype %s not supported" % cai.dtype)
+    if check_shape and len(cai.shape) != 2:
+        raise ValueError("Expected a 2D array, got %d D" % len(cai.shape))
+    shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
+    return make_device_matrix_view[const_uint8_t, int64_t, row_major](
+        <const uint8_t*><uintptr_t>cai.data, shape[0], shape[1])
+
+
+cdef device_matrix_view[const_int8_t, int64_t, row_major] \
+        get_const_dmv_int8(cai, check_shape) except *:
+    if cai.dtype != np.int8:
+        raise TypeError("dtype %s not supported" % cai.dtype)
+    if check_shape and len(cai.shape) != 2:
+        raise ValueError("Expected a 2D array, got %d D" % len(cai.shape))
+    shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
+    return make_device_matrix_view[const_int8_t, int64_t, row_major](
+        <const int8_t*><uintptr_t>cai.data, shape[0], shape[1])
+
+
 cdef optional[device_matrix_view[int64_t, int64_t, row_major]] \
         make_optional_view_int64(device_matrix_view[int64_t, int64_t, row_major]& dmv) except *:  # noqa: E501
     return make_optional[device_matrix_view[int64_t, int64_t, row_major]](dmv)
@@ -222,7 +255,6 @@ cdef host_matrix_view[float, int64_t, row_major] \
     return make_host_matrix_view[float, int64_t, row_major](
         <float*><uintptr_t>cai.data, shape[0], shape[1])
 
-
 cdef host_matrix_view[uint8_t, int64_t, row_major] \
         get_hmv_uint8(cai, check_shape) except *:
     if cai.dtype != np.uint8:
@@ -265,3 +297,36 @@ cdef host_matrix_view[uint32_t, int64_t, row_major] \
     shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
     return make_host_matrix_view[uint32_t, int64_t, row_major](
         <uint32_t*><uintptr_t>cai.data, shape[0], shape[1])
+
+
+cdef host_matrix_view[const_float, int64_t, row_major] \
+        get_const_hmv_float(cai, check_shape) except *:
+    if cai.dtype != np.float32:
+        raise TypeError("dtype %s not supported" % cai.dtype)
+    if check_shape and len(cai.shape) != 2:
+        raise ValueError("Expected a 2D array, got %d D" % len(cai.shape))
+    shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
+    return make_host_matrix_view[const_float, int64_t, row_major](
+        <const float*><uintptr_t>cai.data, shape[0], shape[1])
+
+
+cdef host_matrix_view[const_uint8_t, int64_t, row_major] \
+        get_const_hmv_uint8(cai, check_shape) except *:
+    if cai.dtype != np.uint8:
+        raise TypeError("dtype %s not supported" % cai.dtype)
+    if check_shape and len(cai.shape) != 2:
+        raise ValueError("Expected a 2D array, got %d D" % len(cai.shape))
+    shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
+    return make_host_matrix_view[const_uint8_t, int64_t, row_major](
+        <const uint8_t*><uintptr_t>cai.data, shape[0], shape[1])
+
+
+cdef host_matrix_view[const_int8_t, int64_t, row_major] \
+        get_const_hmv_int8(cai, check_shape) except *:
+    if cai.dtype != np.int8:
+        raise TypeError("dtype %s not supported" % cai.dtype)
+    if check_shape and len(cai.shape) != 2:
+        raise ValueError("Expected a 2D array, got %d D" % len(cai.shape))
+    shape = (cai.shape[0], cai.shape[1] if len(cai.shape) == 2 else 1)
+    return make_host_matrix_view[const_int8_t, int64_t, row_major](
+        <const_int8_t*><uintptr_t>cai.data, shape[0], shape[1])
diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx
index fbc1623cac..e0c59a5ed3 100644
--- a/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx
+++ b/python/pylibraft/pylibraft/neighbors/cagra/cagra.pyx
@@ -69,6 +69,12 @@ from pylibraft.common.cpp.mdspan cimport (
     row_major,
 )
 from pylibraft.common.mdspan cimport (
+    get_const_dmv_float,
+    get_const_dmv_int8,
+    get_const_dmv_uint8,
+    get_const_hmv_float,
+    get_const_hmv_int8,
+    get_const_hmv_uint8,
     get_dmv_float,
     get_dmv_int8,
     get_dmv_int64,
@@ -162,6 +168,31 @@ cdef class IndexFloat(Index):
         attr_str = [m_str] + attr_str
         return "Index(type=CAGRA, " + (", ".join(attr_str)) + ")"
 
+    @auto_sync_handle
+    def update_dataset(self, dataset, handle=None):
+        """ Replace the dataset with a new dataset.
+
+        Parameters
+        ----------
+        dataset : array interface compliant matrix shape (n_samples, dim)
+        {handle_docstring}
+        """
+        cdef device_resources* handle_ = \
+            <device_resources*><size_t>handle.getHandle()
+
+        dataset_ai = wrap_array(dataset)
+        dataset_dt = dataset_ai.dtype
+        _check_input_array(dataset_ai, [np.dtype("float32")])
+
+        if dataset_ai.from_cai:
+            self.index[0].update_dataset(deref(handle_),
+                                         get_const_dmv_float(dataset_ai,
+                                                             check_shape=True))
+        else:
+            self.index[0].update_dataset(deref(handle_),
+                                         get_const_hmv_float(dataset_ai,
+                                                             check_shape=True))
+
     @property
     def metric(self):
         return self.index[0].metric()
@@ -195,6 +226,31 @@ cdef class IndexInt8(Index):
         self.index = new c_cagra.index[int8_t, uint32_t](
             deref(handle_))
 
+    @auto_sync_handle
+    def update_dataset(self, dataset, handle=None):
+        """ Replace the dataset with a new dataset.
+
+        Parameters
+        ----------
+        dataset : array interface compliant matrix shape (n_samples, dim)
+        {handle_docstring}
+        """
+        cdef device_resources* handle_ = \
+            <device_resources*><size_t>handle.getHandle()
+
+        dataset_ai = wrap_array(dataset)
+        dataset_dt = dataset_ai.dtype
+        _check_input_array(dataset_ai, [np.dtype("byte")])
+
+        if dataset_ai.from_cai:
+            self.index[0].update_dataset(deref(handle_),
+                                         get_const_dmv_int8(dataset_ai,
+                                                            check_shape=True))
+        else:
+            self.index[0].update_dataset(deref(handle_),
+                                         get_const_hmv_int8(dataset_ai,
+                                                            check_shape=True))
+
     def __repr__(self):
         m_str = "metric=" + _get_metric_string(self.index.metric())
         attr_str = [attr + "=" + str(getattr(self, attr))
@@ -235,6 +291,31 @@ cdef class IndexUint8(Index):
         self.index = new c_cagra.index[uint8_t, uint32_t](
             deref(handle_))
 
+    @auto_sync_handle
+    def update_dataset(self, dataset, handle=None):
+        """ Replace the dataset with a new dataset.
+
+        Parameters
+        ----------
+        dataset : array interface compliant matrix shape (n_samples, dim)
+        {handle_docstring}
+        """
+        cdef device_resources* handle_ = \
+            <device_resources*><size_t>handle.getHandle()
+
+        dataset_ai = wrap_array(dataset)
+        dataset_dt = dataset_ai.dtype
+        _check_input_array(dataset_ai, [np.dtype("ubyte")])
+
+        if dataset_ai.from_cai:
+            self.index[0].update_dataset(deref(handle_),
+                                         get_const_dmv_uint8(dataset_ai,
+                                                             check_shape=True))
+        else:
+            self.index[0].update_dataset(deref(handle_),
+                                         get_const_hmv_uint8(dataset_ai,
+                                                             check_shape=True))
+
     def __repr__(self):
         m_str = "metric=" + _get_metric_string(self.index.metric())
         attr_str = [attr + "=" + str(getattr(self, attr))
@@ -693,7 +774,7 @@ def search(SearchParams search_params,
 
 
 @auto_sync_handle
-def save(filename, Index index, handle=None):
+def save(filename, Index index, bool include_dataset=True, handle=None):
     """
     Saves the index to a file.
 
@@ -706,6 +787,12 @@ def save(filename, Index index, handle=None):
         Name of the file.
     index : Index
         Trained CAGRA index.
+    include_dataset : bool
+        Whether or not to write out the dataset along with the index. Including
+        the dataset in the serialized index will use extra disk space, and
+        might not be desired if you already have a copy of the dataset on
+        disk. If this option is set to false, you will have to call
+        `index.update_dataset(dataset)` after loading the index.
     {handle_docstring}
 
     Examples
@@ -741,15 +828,17 @@ def save(filename, Index index, handle=None):
     if index.active_index_type == "float32":
         idx_float = index
         c_cagra.serialize_file(
-            deref(handle_), c_filename, deref(idx_float.index))
+            deref(handle_), c_filename, deref(idx_float.index),
+            include_dataset)
     elif index.active_index_type == "byte":
         idx_int8 = index
         c_cagra.serialize_file(
-            deref(handle_), c_filename, deref(idx_int8.index))
+            deref(handle_), c_filename, deref(idx_int8.index), include_dataset)
     elif index.active_index_type == "ubyte":
         idx_uint8 = index
         c_cagra.serialize_file(
-            deref(handle_), c_filename, deref(idx_uint8.index))
+            deref(handle_), c_filename, deref(idx_uint8.index),
+            include_dataset)
     else:
         raise ValueError(
             "Index dtype %s not supported" % index.active_index_type)
@@ -785,12 +874,9 @@ def load(filename, handle=None):
     cdef IndexInt8 idx_int8
     cdef IndexUint8 idx_uint8
 
-    # we extract the dtype from the array interfaces in the file
-    with open(filename, 'rb') as f:
-        type_str = f.read(700).decode("utf-8", errors='ignore')
-
-    # Read description of the 6th element to get the datatype
-    dataset_dt = np.dtype(type_str.split('descr')[6][5:7])
+    with open(filename, "rb") as f:
+        type_str = f.read(3).decode("utf8")
+    dataset_dt = np.dtype(type_str)
 
     if dataset_dt == np.float32:
         idx_float = IndexFloat(handle)
diff --git a/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd b/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd
index 284c75b771..0c683bcd9b 100644
--- a/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd
+++ b/python/pylibraft/pylibraft/neighbors/cagra/cpp/c_cagra.pxd
@@ -36,6 +36,7 @@ from pylibraft.common.cpp.mdspan cimport (
     row_major,
 )
 from pylibraft.common.handle cimport device_resources
+from pylibraft.common.mdspan cimport const_float, const_int8_t, const_uint8_t
 from pylibraft.common.optional cimport optional
 from pylibraft.distance.distance_type cimport DistanceType
 from pylibraft.neighbors.ivf_pq.cpp.c_ivf_pq cimport (
@@ -90,6 +91,17 @@ cdef extern from "raft/neighbors/cagra_types.hpp" \
         device_matrix_view[T, IdxT, row_major] dataset()
         device_matrix_view[T, IdxT, row_major] graph()
 
+        # hack: can't use the T template param here because of issues handling
+        # const w/ cython. introduce a new template param to get around this
+        void update_dataset[ValueT](const device_resources & handle,
+                                    host_matrix_view[ValueT,
+                                                     int64_t,
+                                                     row_major] dataset)
+        void update_dataset[ValueT](const device_resources & handle,
+                                    device_matrix_view[ValueT,
+                                                       int64_t,
+                                                       row_major] dataset)
+
 cdef extern from "raft_runtime/neighbors/cagra.hpp" \
         namespace "raft::runtime::neighbors::cagra" nogil:
 
@@ -155,7 +167,8 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \
 
     cdef void serialize(const device_resources& handle,
                         string& str,
-                        const index[float, uint32_t]& index) except +
+                        const index[float, uint32_t]& index,
+                        bool include_dataset) except +
 
     cdef void deserialize(const device_resources& handle,
                           const string& str,
@@ -163,7 +176,8 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \
 
     cdef void serialize(const device_resources& handle,
                         string& str,
-                        const index[uint8_t, uint32_t]& index) except +
+                        const index[uint8_t, uint32_t]& index,
+                        bool include_dataset) except +
 
     cdef void deserialize(const device_resources& handle,
                           const string& str,
@@ -171,7 +185,8 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \
 
     cdef void serialize(const device_resources& handle,
                         string& str,
-                        const index[int8_t, uint32_t]& index) except +
+                        const index[int8_t, uint32_t]& index,
+                        bool include_dataset) except +
 
     cdef void deserialize(const device_resources& handle,
                           const string& str,
@@ -179,7 +194,8 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \
 
     cdef void serialize_file(const device_resources& handle,
                              const string& filename,
-                             const index[float, uint32_t]& index) except +
+                             const index[float, uint32_t]& index,
+                             bool include_dataset) except +
 
     cdef void deserialize_file(const device_resources& handle,
                                const string& filename,
@@ -187,7 +203,8 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \
 
     cdef void serialize_file(const device_resources& handle,
                              const string& filename,
-                             const index[uint8_t, uint32_t]& index) except +
+                             const index[uint8_t, uint32_t]& index,
+                             bool include_dataset) except +
 
     cdef void deserialize_file(const device_resources& handle,
                                const string& filename,
@@ -195,7 +212,8 @@ cdef extern from "raft_runtime/neighbors/cagra.hpp" \
 
     cdef void serialize_file(const device_resources& handle,
                              const string& filename,
-                             const index[int8_t, uint32_t]& index) except +
+                             const index[int8_t, uint32_t]& index,
+                             bool include_dataset) except +
 
     cdef void deserialize_file(const device_resources& handle,
                                const string& filename,
diff --git a/python/pylibraft/pylibraft/test/test_cagra.py b/python/pylibraft/pylibraft/test/test_cagra.py
index 435b2878a2..74e9f53b91 100644
--- a/python/pylibraft/pylibraft/test/test_cagra.py
+++ b/python/pylibraft/pylibraft/test/test_cagra.py
@@ -255,7 +255,8 @@ def test_cagra_search_params(params):
 
 
 @pytest.mark.parametrize("dtype", [np.float32, np.int8, np.ubyte])
-def test_save_load(dtype):
+@pytest.mark.parametrize("include_dataset", [True, False])
+def test_save_load(dtype, include_dataset):
     n_rows = 10000
     n_cols = 50
     n_queries = 1000
@@ -268,9 +269,14 @@ def test_save_load(dtype):
 
     assert index.trained
     filename = "my_index.bin"
-    cagra.save(filename, index)
+    cagra.save(filename, index, include_dataset=include_dataset)
     loaded_index = cagra.load(filename)
 
+    # if we didn't save the dataset with the index, we need to update the
+    # index with an already loaded copy
+    if not include_dataset:
+        loaded_index.update_dataset(dataset)
+
     queries = generate_data((n_queries, n_cols), dtype)
 
     queries_device = device_ndarray(queries)
diff --git a/python/raft-dask/pyproject.toml b/python/raft-dask/pyproject.toml
index 3c81b6f16b..bdbcf61e0f 100644
--- a/python/raft-dask/pyproject.toml
+++ b/python/raft-dask/pyproject.toml
@@ -35,8 +35,8 @@ license = { text = "Apache 2.0" }
 requires-python = ">=3.9"
 dependencies = [
     "dask-cuda==23.10.*",
-    "dask==2023.7.1",
-    "distributed==2023.7.1",
+    "dask>=2023.7.1",
+    "distributed>=2023.7.1",
     "joblib>=0.11",
     "numba>=0.57",
     "numpy>=1.21",