diff --git a/.gitignore b/.gitignore
index 5bb32fd430..fce749bd14 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,14 @@
-build/
+.eggs/
 .idea
+__pycache__/
 _skbuild/
+build/
+cpp/src/legate_library.cc
+cpp/src/legate_library.h
+cpp/src/legate_library.h.eggs/
 dist/
 legate.raft.egg-info/
 legate/raft/__pycache__/
-legate/raft/library.py
 legate/raft/install_info.py
+legate/raft/library.py
 pytest/__pycache__
-cpp/src/legate_library.cc
-cpp/src/legate_library.h
\ No newline at end of file
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..c5ae9108f1
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+---
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+repos:
+    - repo: https://github.com/psf/black
+      rev: 23.1.0
+      hooks:
+          - id: black
+            exclude: 'legate/naive_bayes/library\.py$'
+    - repo: https://github.com/PyCQA/flake8
+      rev: 6.0.0
+      hooks:
+          - id: flake8
+            exclude: 'legate/naive_bayes/library\.py$'
+    - repo: https://github.com/codespell-project/codespell
+      rev: v2.2.4
+      hooks:
+          - id: codespell
+            additional_dependencies: [tomli]
+    - repo: https://github.com/pycqa/isort
+      rev: 5.12.0
+      hooks:
+          - id: isort
+    - repo: https://github.com/kynan/nbstripout
+      rev: 0.6.1
+      hooks:
+          - id: nbstripout
+
+default_language_version:
+    python: python3
diff --git a/README.md b/README.md
index 1e4c36a113..c0d1ec4414 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,7 @@ mamba install -c conda-forge -c rapidsai-nightly -c nvidia libraft==23.04*
 
 - Can potentially copy in `bincount` from legate.core or the `raft::stats::histogram` or other RAFT primitives.
 - maybe we could try to use pylibraft as a legate task if they make progress on exposing legate tasks through Python UDFs?
+- Interested in an end-to-end prototype that uses the underlying indices array from the legate.sparse csr w/ bincount/histogram primitive. 
 
 ### K-NN
 
diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt
index 4efce027d3..b522fdc18f 100644
--- a/cpp/src/CMakeLists.txt
+++ b/cpp/src/CMakeLists.txt
@@ -3,13 +3,29 @@ legate_cpp_library_template(legate_raft TEMPLATE_SOURCES)
 add_library(
         legate_raft
 
-        # RAFT API wrappers
+        # RAFT API Wrappers
+        raft/histogram.cu
+        raft/distance.cu
         raft/raft_knn.cu
         raft/raft_knn_merge.cu
 
         # Legate tasks
-        legate_tasks/knn_task.cc
-        legate_tasks/knn_merge_task.cc
+        task/add.cc
+        # task/compute_1nn.cc
+        task/add_constant.cc
+        task/bincount.cc
+        task/categorize.cc
+        task/convert.cc
+        task/exp.cc
+        task/fill.cc
+        task/find_max.cc
+        task/histogram.cc
+        task/knn_task.cc
+        task/knn_merge_task.cc
+        task/log.cc
+        task/matmul.cc
+        task/mul.cc
+        task/sum_over_axis.cc
 
         # Library templates
         ${TEMPLATE_SOURCES}
diff --git a/cpp/src/legate_raft_cffi.h b/cpp/src/legate_raft_cffi.h
index eb004ac3f6..06380223ac 100644
--- a/cpp/src/legate_raft_cffi.h
+++ b/cpp/src/legate_raft_cffi.h
@@ -1,5 +1,19 @@
 enum LegateRaftOpCode {
     _OP_CODE_BASE = 0,
-    RAFT_KNN   = 1,
-    RAFT_KNN_MERGE = 2,
-};
\ No newline at end of file
+    ADD,
+    ADD_CONSTANT,
+    BINCOUNT,
+    CATEGORIZE,
+    CONVERT,
+    EXP,
+    FILL,
+    FIND_MAX,
+    FUSED_1NN,
+    HISTOGRAM,
+    LOG,
+    MATMUL,
+    MUL,
+    RAFT_KNN,
+    RAFT_KNN_MERGE,
+    SUM_OVER_AXIS,
+};
diff --git a/cpp/src/legate_tasks/histogram.cc b/cpp/src/legate_tasks/histogram.cc
deleted file mode 100644
index 742b632be2..0000000000
--- a/cpp/src/legate_tasks/histogram.cc
+++ /dev/null
@@ -1,27 +0,0 @@
-#include "../raft/raft_api.hpp"
-
-#include "../legate_raft.h"
-#include "../legate_library.h"
-
-namespace legate_raft {
-
-    // FUSED_1NN comes from
-    class HistogramTask : public Task<HistogramTask, HISTOGRAM> {
-    public:
-        static void gpu_variant(legate::TaskContext& context)
-        {
-            test_histogram();
-        }
-    };
-
-}  // namespace legate_raft
-
-namespace  // unnamed
-{
-
-    static void __attribute__((constructor)) register_tasks(void)
-    {
-        legate_raft::HistogramTask::register_variants();
-    }
-
-}  // namespace
\ No newline at end of file
diff --git a/cpp/src/task/add.cc b/cpp/src/task/add.cc
new file mode 100644
index 0000000000..c07eea1f06
--- /dev/null
+++ b/cpp/src/task/add.cc
@@ -0,0 +1,72 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct add_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& x1, legate::Store& x2)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = x1.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto x1_acc = x1.read_accessor<VAL, DIM>();
+    auto x2_acc = x2.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = x1_acc[p] + x2_acc[p];
+    }
+  }
+};
+
+}  // namespace
+
+class AddTask : public Task<AddTask, ADD> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input1  = context.inputs()[0];
+    auto& input2  = context.inputs()[1];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input1.dim(), input1.code(), add_fn{}, output, input1, input2);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::AddTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/add_constant.cc b/cpp/src/task/add_constant.cc
new file mode 100644
index 0000000000..5f4318a1a5
--- /dev/null
+++ b/cpp/src/task/add_constant.cc
@@ -0,0 +1,69 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct add_constant_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input, legate::Scalar& value)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = input_acc[p] + value.value<VAL>();
+    }
+  }
+};
+
+}  // namespace
+
+class AddConstantTask : public Task<AddConstantTask, ADD_CONSTANT> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& value  = context.scalars()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), add_constant_fn{}, output, input, value);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::AddConstantTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/bincount.cc b/cpp/src/task/bincount.cc
new file mode 100644
index 0000000000..0426192274
--- /dev/null
+++ b/cpp/src/task/bincount.cc
@@ -0,0 +1,55 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+class BincountTask : public Task<BincountTask, BINCOUNT> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.reductions()[0];
+
+    auto in_shape  = input.shape<1>();
+    auto out_shape = output.shape<1>();
+
+    auto in_acc  = input.read_accessor<uint64_t, 1>();
+    auto out_acc = output.reduce_accessor<legate::SumReduction<uint64_t>, true, 1>();
+
+    for (legate::PointInRectIterator<1> it(in_shape); it.valid(); ++it) {
+      auto& value = in_acc[*it];
+      legate::Point<1> pos_reduce(static_cast<int64_t>(value));
+
+      if (out_shape.contains(pos_reduce)) out_acc.reduce(pos_reduce, 1);
+    }
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::BincountTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/categorize.cc b/cpp/src/task/categorize.cc
new file mode 100644
index 0000000000..ab188befed
--- /dev/null
+++ b/cpp/src/task/categorize.cc
@@ -0,0 +1,89 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/type_traits.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct categorize_fn {
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<!legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto in_shape  = result.shape<1>();
+    auto bin_shape = bins.shape<1>();
+
+    assert(!bin_shape.empty());
+    if (in_shape.empty()) return;
+
+    auto num_bins = bin_shape.hi[0] - bin_shape.lo[0];
+
+    auto in_acc  = input.read_accessor<VAL, 1>();
+    auto bin_acc = bins.read_accessor<VAL, 1>();
+    auto res_acc = result.write_accessor<uint64_t, 1>();
+
+    for (legate::PointInRectIterator<1> it(in_shape); it.valid(); ++it) {
+      auto p      = *it;
+      auto& value = in_acc[p];
+      for (auto bin_idx = 0; bin_idx < num_bins; ++bin_idx) {
+        if (bin_acc[bin_idx] <= value && value < bin_acc[bin_idx + 1]) {
+          res_acc[p] = static_cast<uint64_t>(bin_idx);
+          break;
+        }
+      }
+    }
+  }
+
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    assert(false);
+  }
+};
+
+}  // namespace
+
+class CategorizeTask : public Task<CategorizeTask, CATEGORIZE> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& bins   = context.inputs()[1];
+    auto& result = context.outputs()[0];
+
+    legate::type_dispatch(input.code(), categorize_fn{}, result, input, bins);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::CategorizeTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/legate_tasks/compute_1nn.cc b/cpp/src/task/compute_1nn.cc
similarity index 97%
rename from cpp/src/legate_tasks/compute_1nn.cc
rename to cpp/src/task/compute_1nn.cc
index 7439af105f..2a0c630c5f 100644
--- a/cpp/src/legate_tasks/compute_1nn.cc
+++ b/cpp/src/task/compute_1nn.cc
@@ -23,4 +23,4 @@ namespace  // unnamed
         legate_raft::Compute1NNTask::register_variants();
     }
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/cpp/src/task/convert.cc b/cpp/src/task/convert.cc
new file mode 100644
index 0000000000..56b76930f1
--- /dev/null
+++ b/cpp/src/task/convert.cc
@@ -0,0 +1,93 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/typedefs.h"
+
+namespace legate_raft {
+
+namespace {
+
+template <legate::LegateTypeCode SRC_TYPE, legate::LegateTypeCode DST_TYPE>
+struct convert_fn {
+  template <int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using SRC = legate::legate_type_of<SRC_TYPE>;
+    using DST = legate::legate_type_of<DST_TYPE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<SRC, DIM>();
+    auto output_acc = output.write_accessor<DST, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = static_cast<DST>(input_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class ConvertTask : public Task<ConvertTask, CONVERT> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.outputs()[0];
+
+    switch (input.code()) {
+        case legate::LegateTypeCode::INT64_LT:
+            switch(output.code()) {
+                case legate::LegateTypeCode::FLOAT_LT:
+                    return legate::dim_dispatch(
+                        input.dim(),
+                        convert_fn<legate::LegateTypeCode::INT64_LT, legate::LegateTypeCode::FLOAT_LT>{},
+                        output, input
+                    );
+                case legate::LegateTypeCode::DOUBLE_LT:
+                    return legate::dim_dispatch(
+                        input.dim(),
+                        convert_fn<legate::LegateTypeCode::INT64_LT, legate::LegateTypeCode::DOUBLE_LT>{},
+                        output, input
+                    );
+                default:
+                    throw(output.code()); // output type not supported
+            }
+        default:
+            throw(input.code()); // input type not supported
+    }
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::ConvertTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/exp.cc b/cpp/src/task/exp.cc
new file mode 100644
index 0000000000..f952845bf7
--- /dev/null
+++ b/cpp/src/task/exp.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct exp_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = exp(input_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class ExpTask : public Task<ExpTask, EXP> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), exp_fn{}, output, input);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::ExpTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/fill.cc b/cpp/src/task/fill.cc
new file mode 100644
index 0000000000..007b1af30d
--- /dev/null
+++ b/cpp/src/task/fill.cc
@@ -0,0 +1,67 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct fill_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Scalar& value)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = output.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = value.value<VAL>();
+    }
+  }
+};
+
+}  // namespace
+
+class FillTask : public Task<FillTask, FILL> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& value  = context.scalars()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(output.dim(), output.code(), fill_fn{}, output, value);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::FillTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/find_max.cc b/cpp/src/task/find_max.cc
new file mode 100644
index 0000000000..b17cc95485
--- /dev/null
+++ b/cpp/src/task/find_max.cc
@@ -0,0 +1,96 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/typedefs.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct find_max_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto in_acc  = input.read_accessor<VAL, DIM>();
+    auto red_acc = output.reduce_accessor<legate::MaxReduction<VAL>, true, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p = *it;
+      // Coordinates of the contracting dimension are ignored by red_acc via an affine
+      // transformation. For example, if the store was 3D and the second dimension was contracted,
+      // each point p will go through the following affine transformation to recover the point in
+      // the domain prior to the promotion:
+      //
+      //     | 1  0  0 |     | x |
+      //     |         |  *  | y |
+      //     | 0  0  1 |     | z |
+      //
+      // where the "*" operator denotes a matrix-vector multiplication.
+      red_acc.reduce(p, in_acc[p]);
+    }
+  }
+};
+
+
+template <legate::LegateTypeCode CODE>
+struct find_max_fn_outer {
+  template <int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    find_max_fn{}.operator()<CODE, DIM>(output, input);
+  }
+};
+
+
+}  // namespace
+
+class FindMaxTask : public Task<FindMaxTask, FIND_MAX> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.reductions()[0];
+
+    switch(input.code()) {
+      case legate::LegateTypeCode::DOUBLE_LT: {
+        return legate::dim_dispatch(input.dim(), find_max_fn_outer<legate::LegateTypeCode::DOUBLE_LT>{}, output, input);
+      default:
+        throw("Input type not supported.");
+      }
+    }
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::FindMaxTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/histogram.cc b/cpp/src/task/histogram.cc
new file mode 100644
index 0000000000..8af2d5f99e
--- /dev/null
+++ b/cpp/src/task/histogram.cc
@@ -0,0 +1,87 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/type_traits.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct histogram_fn {
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<!legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto in_shape  = input.shape<1>();
+    auto bin_shape = bins.shape<1>();
+
+    assert(!bin_shape.empty());
+    if (in_shape.empty()) return;
+
+    auto num_bins = bin_shape.hi[0] - bin_shape.lo[0];
+
+    auto in_acc  = input.read_accessor<VAL, 1>();
+    auto bin_acc = bins.read_accessor<VAL, 1>();
+    auto res_acc = result.reduce_accessor<legate::SumReduction<uint64_t>, true, 1>();
+
+    for (legate::PointInRectIterator<1> it(in_shape); it.valid(); ++it) {
+      auto& value = in_acc[*it];
+      for (auto bin_idx = 0; bin_idx < num_bins; ++bin_idx)
+        if (bin_acc[bin_idx] <= value && value < bin_acc[bin_idx + 1]) {
+          res_acc.reduce(bin_idx, 1);
+          break;
+        }
+    }
+  }
+
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    assert(false);
+  }
+};
+
+}  // namespace
+
+class HistogramTask : public Task<HistogramTask, HISTOGRAM> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& bins   = context.inputs()[1];
+    auto& result = context.reductions()[0];
+
+    legate::type_dispatch(input.code(), histogram_fn{}, result, input, bins);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::HistogramTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/legate_tasks/knn_merge_task.cc b/cpp/src/task/knn_merge_task.cc
similarity index 100%
rename from cpp/src/legate_tasks/knn_merge_task.cc
rename to cpp/src/task/knn_merge_task.cc
diff --git a/cpp/src/legate_tasks/knn_task.cc b/cpp/src/task/knn_task.cc
similarity index 100%
rename from cpp/src/legate_tasks/knn_task.cc
rename to cpp/src/task/knn_task.cc
diff --git a/cpp/src/task/log.cc b/cpp/src/task/log.cc
new file mode 100644
index 0000000000..22bff125a6
--- /dev/null
+++ b/cpp/src/task/log.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct log_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = log(input_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class LogTask : public Task<LogTask, LOG> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), log_fn{}, output, input);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::LogTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/matmul.cc b/cpp/src/task/matmul.cc
new file mode 100644
index 0000000000..eb9c8bdc1e
--- /dev/null
+++ b/cpp/src/task/matmul.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct matmul_fn {
+  template <legate::LegateTypeCode CODE>
+  void operator()(legate::Store& lhs, legate::Store& rhs1, legate::Store& rhs2)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = rhs1.shape<3>().intersection(rhs2.shape<3>());
+
+    if (shape.empty()) return;
+
+    auto rhs1_acc = rhs1.read_accessor<VAL, 3>();
+    auto rhs2_acc = rhs2.read_accessor<VAL, 3>();
+    auto lhs_acc  = lhs.reduce_accessor<legate::SumReduction<VAL>, true, 3>();
+
+    for (legate::PointInRectIterator<3> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p = *it;
+      lhs_acc.reduce(p, rhs1_acc[p] * rhs2_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class MatMulTask : public Task<MatMulTask, MATMUL> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& rhs1 = context.inputs()[0];
+    auto& rhs2 = context.inputs()[1];
+    auto& lhs  = context.reductions()[0];
+
+    legate::type_dispatch(lhs.code(), matmul_fn{}, lhs, rhs1, rhs2);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::MatMulTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/mul.cc b/cpp/src/task/mul.cc
new file mode 100644
index 0000000000..e71715c7e5
--- /dev/null
+++ b/cpp/src/task/mul.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct mul_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& lhs, legate::Store& rhs1, legate::Store& rhs2)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = lhs.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto rhs1_acc = rhs1.read_accessor<VAL, DIM>();
+    auto rhs2_acc = rhs2.read_accessor<VAL, DIM>();
+    auto lhs_acc  = lhs.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p     = *it;
+      lhs_acc[p] = rhs1_acc[p] * rhs2_acc[p];
+    }
+  }
+};
+
+}  // namespace
+
+class MultiplyTask : public Task<MultiplyTask, MUL> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& rhs1 = context.inputs()[0];
+    auto& rhs2 = context.inputs()[1];
+    auto& lhs  = context.outputs()[0];
+
+    legate::double_dispatch(lhs.dim(), lhs.code(), mul_fn{}, lhs, rhs1, rhs2);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::MultiplyTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/sum_over_axis.cc b/cpp/src/task/sum_over_axis.cc
new file mode 100644
index 0000000000..e914c2c7c2
--- /dev/null
+++ b/cpp/src/task/sum_over_axis.cc
@@ -0,0 +1,78 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct reduction_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto in_acc  = input.read_accessor<VAL, DIM>();
+    auto red_acc = output.reduce_accessor<legate::SumReduction<VAL>, true, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p = *it;
+      // Coordinates of the contracting dimension are ignored by red_acc via an affine
+      // transformation. For example, if the store was 3D and the second dimension was contracted,
+      // each point p will go through the following affine transformation to recover the point in
+      // the domain prior to the promotion:
+      //
+      //     | 1  0  0 |     | x |
+      //     |         |  *  | y |
+      //     | 0  0  1 |     | z |
+      //
+      // where the "*" operator denotes a matrix-vector multiplication.
+      red_acc.reduce(p, in_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class SumOverAxisTask : public Task<SumOverAxisTask, SUM_OVER_AXIS> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.reductions()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), reduction_fn{}, output, input);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::SumOverAxisTask::register_variants();
+}
+
+}  // namespace
diff --git a/legate/raft/__init__.py b/legate/raft/__init__.py
index 1252579f60..e6494a0ab2 100644
--- a/legate/raft/__init__.py
+++ b/legate/raft/__init__.py
@@ -13,6 +13,25 @@
 # limitations under the License.
 #
 
+from .array_api import add, exp, fill, log, negative, subtract, sum_over_axis
+from .core import as_array, as_store, convert
+from .multiarray import bincount, categorize, matmul, multiply
 from .knn import run_knn
 
-__all__ = ["run_knn"]
+__all__ = [
+    "add",
+    "as_array",
+    "as_store",
+    "bincount",
+    "categorize",
+    "convert",
+    "exp",
+    "fill",
+    "log",
+    "matmul",
+    "multiply",
+    "negative",
+    "run_knn",
+    "subtract",
+    "sum_over_axis",
+]
diff --git a/legate/raft/array_api.py b/legate/raft/array_api.py
new file mode 100644
index 0000000000..2fa748208b
--- /dev/null
+++ b/legate/raft/array_api.py
@@ -0,0 +1,222 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from numbers import Number
+
+import numpy as np
+import pyarrow as pa
+
+import legate.core.types as ty
+from legate.core import Store
+from legate.raft.cffi import OpCode
+from legate.raft.library import user_context as context
+from legate.raft.util import promote
+
+
+def fill(shape, fill_value, dtype=None) -> Store:
+    if dtype is None:
+        try:
+            dtype = pa.from_numpy_dtype(fill_value.dtype)
+        except AttributeError:
+            fill_value = np.asanyarray(fill_value)
+            dtype = pa.from_numpy_dtype(fill_value.dtype)
+
+    result = context.create_store(dtype, shape)
+    assert result.type == dtype
+
+    task = context.create_auto_task(OpCode.FILL)
+    task.add_output(result)
+    task.add_scalar_arg(fill_value, result.type)
+    task.execute()
+
+    return result
+
+
+def _sanitize_axis(axis: int, ndim: int) -> int:
+    sanitized = axis
+    if sanitized < 0:
+        sanitized += ndim
+    if sanitized < 0 or sanitized >= ndim:
+        raise ValueError(f"Invalid axis {axis} for a {ndim}-D store")
+    return sanitized
+
+
+def sum_over_axis(input: Store, axis: int) -> Store:
+    """
+    Sum values along the chosen axis
+    Parameters
+    ----------
+    input : Store
+        Input to sum
+    axis : int
+        Axis along which the summation should be done
+    Returns
+    -------
+    Store
+        Summation result
+    """
+    sanitized = _sanitize_axis(axis, input.ndim)
+
+    # Compute the output shape by removing the axis being summed over
+    res_shape = tuple(ext for dim, ext in enumerate(input.shape) if dim != sanitized)
+    result = fill(res_shape, 0, dtype=input.type)
+
+    # Broadcast the output along the contracting dimension
+    promoted = result.promote(axis, input.shape[axis])
+
+    assert promoted.shape == input.shape
+
+    task = context.create_auto_task(OpCode.SUM_OVER_AXIS)
+    task.add_input(input)
+    task.add_reduction(promoted, ty.ReductionOp.ADD)
+    task.add_alignment(input, promoted)
+
+    task.execute()
+
+    return result
+
+
+def _add_constant(input: Store, value: Number) -> Store:
+    result = context.create_store(input.type, input.shape)
+
+    task = context.create_auto_task(OpCode.ADD_CONSTANT)
+    task.add_input(input)
+    task.add_scalar_arg(value, input.type)
+    task.add_output(result)
+    task.add_alignment(input, result)
+
+    task.execute()
+
+    return result
+
+
+def log(input: Store) -> Store:
+    result = context.create_store(input.type, input.shape)
+
+    task = context.create_auto_task(OpCode.LOG)
+    task.add_input(input)
+    task.add_output(result)
+    task.add_alignment(input, result)
+
+    task.execute()
+
+    return result
+
+
+def exp(input: Store) -> Store:
+    result = context.create_store(input.type, input.shape)
+
+    task = context.create_auto_task(OpCode.EXP)
+    task.add_input(input)
+    task.add_output(result)
+    task.add_alignment(input, result)
+
+    task.execute()
+
+    return result
+
+
+def _add_stores(x1: Store, x2: Store) -> Store:
+    result = context.create_store(x1.type, x1.shape)
+
+    task = context.create_auto_task(OpCode.ADD)
+    task.add_input(x1)
+    task.add_input(x2)
+    task.add_output(result)
+    task.add_alignment(x1, x2)
+    task.add_alignment(x1, result)
+
+    task.execute()
+
+    return result
+
+
+def _add_broadcast(x1: Store, x2: Store) -> Store:
+    def func(dim, dim_size):
+        nonlocal x2
+        x2 = x2.promote(dim, dim_size)
+
+    promote(x2.shape, x1.shape, func)
+    assert x1.shape == x2.shape
+
+    result = context.create_store(x1.type, x1.shape)
+    task = context.create_auto_task(OpCode.ADD)
+    task.add_input(x1)
+    task.add_input(x2)
+    task.add_alignment(x1, x2)
+    task.add_output(result)
+    task.add_alignment(x1, result)
+
+    task.execute()
+
+    return result
+
+
+def add(x1: Store | Number, x2: Store | Number) -> Store | Number:
+    if isinstance(x1, Number):
+        if isinstance(x2, Number):
+            return x1 + x2  # native function
+        else:
+            return add(x2, x1)  # swap operands
+
+    elif isinstance(x2, Number):
+        return _add_constant(x1, x2)
+    elif x1.shape == x2.shape:
+        return _add_stores(x1, x2)
+    else:
+        return _add_broadcast(x1, x2)
+
+
+def negative(lhs: Store) -> Store:
+    minus_one = fill((lhs.shape), lhs.type.type.to_pandas_dtype()(-1))
+    result = context.create_store(lhs.type, lhs.shape)
+
+    task = context.create_auto_task(OpCode.MUL)
+    task.add_input(lhs)
+    task.add_input(minus_one)
+    task.add_alignment(lhs, minus_one)
+    task.add_alignment(lhs, result)
+    task.add_output(result)
+    task.execute()
+
+    return result
+
+
+def subtract(x1: Store | Number, x2: Store | Number) -> Store | Number:
+    if isinstance(x1, Number) and isinstance(x2, Number):
+        return x1 - x2  # native function
+    else:
+        return add(x1, negative(x2))
+
+
+def max(x: Store, axis: int) -> Number:
+    sanitized = _sanitize_axis(axis, x.ndim)
+
+    limit_min = np.finfo(x.type.type.to_pandas_dtype()).min
+    res_shape = tuple(ext for dim, ext in enumerate(x.shape) if dim != sanitized)
+    result = fill(res_shape, limit_min, x.type)
+
+    promoted = result.promote(axis, x.shape[axis])
+    assert promoted.shape == x.shape
+
+    task = context.create_auto_task(OpCode.FIND_MAX)
+    task.add_input(x)
+    task.add_reduction(promoted, ty.ReductionOp.MAX)
+    task.add_alignment(x, promoted)
+
+    task.execute()
+
+    return result
diff --git a/legate/raft/cffi.py b/legate/raft/cffi.py
new file mode 100644
index 0000000000..fff229f2d2
--- /dev/null
+++ b/legate/raft/cffi.py
@@ -0,0 +1,19 @@
+from enum import IntEnum
+
+from .library import user_lib
+
+
+class OpCode(IntEnum):
+    ADD = user_lib.cffi.ADD
+    ADD_CONSTANT = user_lib.cffi.ADD_CONSTANT
+    BINCOUNT = user_lib.cffi.BINCOUNT
+    CATEGORIZE = user_lib.cffi.CATEGORIZE
+    CONVERT = user_lib.cffi.CONVERT
+    EXP = user_lib.cffi.EXP
+    FILL = user_lib.cffi.FILL
+    FIND_MAX = user_lib.cffi.FIND_MAX
+    HISTOGRAM = user_lib.cffi.HISTOGRAM
+    LOG = user_lib.cffi.LOG
+    MATMUL = user_lib.cffi.MATMUL
+    MUL = user_lib.cffi.MUL
+    SUM_OVER_AXIS = user_lib.cffi.SUM_OVER_AXIS
diff --git a/legate/raft/core.py b/legate/raft/core.py
index a904367595..a086611b2e 100644
--- a/legate/raft/core.py
+++ b/legate/raft/core.py
@@ -14,11 +14,15 @@
 #
 
 from dataclasses import dataclass
+
+import numpy as np
 import pyarrow as pa
+
 from legate.core import Store
 from legate.core._legion.future import Future
+
+from .cffi import OpCode
 from .library import user_context as context
-import numpy as np
 
 
 @dataclass
@@ -40,7 +44,8 @@ def __array_interface__(self):
             "strides": self.strides,
         }
 
-def array_to_store(array: np.ndarray) -> Store:
+
+def as_store(array: np.ndarray) -> Store:
     store = context.create_store(
         pa.from_numpy_dtype(array.dtype),
         shape=array.shape,
@@ -54,7 +59,7 @@ def array_to_store(array: np.ndarray) -> Store:
     return store
 
 
-def store_to_array(store: Store) -> np.ndarray:
+def as_array(store: Store) -> np.ndarray:
     if store.kind is Future:
         dtype = store.get_dtype()
         buf = store.storage.get_buffer(dtype.size)
@@ -74,3 +79,15 @@ def construct_ndarray(shape, address, strides):
 
         result = alloc.consume(construct_ndarray)
         return result
+
+
+def convert(input: Store, dtype: pa.DataType) -> Store:
+    dtype = context.type_system[dtype]
+    result = context.create_store(dtype, input.shape)
+    task = context.create_auto_task(OpCode.CONVERT)
+    task.add_input(input)
+    task.add_output(result)
+    task.add_alignment(input, result)
+    task.execute()
+
+    return result
diff --git a/legate/raft/knn.py b/legate/raft/knn.py
index 11cdbf8647..aaac35b1c9 100644
--- a/legate/raft/knn.py
+++ b/legate/raft/knn.py
@@ -24,7 +24,7 @@
 def run_knn(index: np.ndarray,
             search: np.ndarray,
             n_neighbors: int,
-            metric : str ='l2'):
+            metric : str = 'l2'):
     index_batch_size = 512
     query_batch_size = 8
     n_features = index.shape[1]
@@ -44,8 +44,10 @@ def run_knn(index: np.ndarray,
     distances_buffer_array = np.zeros((buffer_size, n_neighbors), dtype=np.float32)
     indices_buffer_store = array_to_store(indices_buffer_array)
     distances_buffer_store = array_to_store(distances_buffer_array)
-    indices_buffer_store = indices_buffer_store.partition_by_tiling((query_batch_size, n_neighbors))
-    distances_buffer_store = distances_buffer_store.partition_by_tiling((query_batch_size, n_neighbors))
+    indices_buffer_store = \
+        indices_buffer_store.partition_by_tiling((query_batch_size, n_neighbors))
+    distances_buffer_store = \
+        distances_buffer_store.partition_by_tiling((query_batch_size, n_neighbors))
 
     # Run KNN task
     nn_task = context.create_manual_task(user_lib.cffi.RAFT_KNN,
diff --git a/legate/raft/multiarray.py b/legate/raft/multiarray.py
new file mode 100644
index 0000000000..4e0c05619e
--- /dev/null
+++ b/legate/raft/multiarray.py
@@ -0,0 +1,151 @@
+import legate.core.types as ty
+from legate.core import Store
+from legate.raft.array_api import fill
+from legate.raft.cffi import OpCode
+from legate.raft.library import user_context as context
+
+
+def multiply(rhs1: Store, rhs2: Store) -> Store:
+    if rhs1.type != rhs2.type or rhs1.shape != rhs2.shape:
+        raise ValueError("Stores to add must have the same type and shape")
+
+    result = context.create_store(rhs1.type.type, rhs1.shape)
+
+    task = context.create_auto_task(OpCode.MUL)
+    task.add_input(rhs1)
+    task.add_input(rhs2)
+    task.add_output(result)
+    task.add_alignment(result, rhs1)
+    task.add_alignment(result, rhs2)
+
+    task.execute()
+
+    return result
+
+
+def matmul(rhs1: Store, rhs2: Store) -> Store:
+    """
+    Performs matrix multiplication
+    Parameters
+    ----------
+    rhs1, rhs2 : Store
+        Matrices to multiply
+    Returns
+    -------
+    Store
+        Multiplication result
+    """
+    if rhs1.ndim != 2 or rhs2.ndim != 2:
+        raise ValueError("Stores must be 2D")
+    if rhs1.type != rhs2.type:
+        raise ValueError("Stores must have the same type")
+    if rhs1.shape[1] != rhs2.shape[0]:
+        raise ValueError(
+            "Can't do matrix mulplication between arrays of "
+            f"shapes {rhs1.shape} and {rhs1.shape}"
+        )
+
+    m = rhs1.shape[0]
+    k = rhs1.shape[1]
+    n = rhs2.shape[1]
+
+    # Multiplying an (m, k) matrix with a (k, n) matrix gives
+    # an (m, n) matrix
+    result = fill((m, n), 0, dtype=rhs1.type)
+
+    # Each store gets a fake dimension that it doesn't have
+    rhs1 = rhs1.promote(2, n)
+    rhs2 = rhs2.promote(0, m)
+    lhs = result.promote(1, k)
+
+    assert lhs.shape == rhs1.shape
+    assert lhs.shape == rhs2.shape
+
+    task = context.create_auto_task(OpCode.MATMUL)
+    task.add_input(rhs1)
+    task.add_input(rhs2)
+    task.add_reduction(lhs, ty.ReductionOp.ADD)
+    task.add_alignment(lhs, rhs1)
+    task.add_alignment(lhs, rhs2)
+
+    task.execute()
+
+    return result
+
+
+def bincount(input: Store, num_bins: int) -> Store:
+    """
+    Counts the occurrences of each bin index
+    Parameters
+    ----------
+    input : Store
+        Input to bin-count
+    num_bins : int
+        Number of bins
+    Returns
+    -------
+    Store
+        Counting result
+    """
+    result = fill((num_bins,), 0, ty.uint64)
+
+    task = context.create_auto_task(OpCode.BINCOUNT)
+    task.add_input(input)
+    # Broadcast the result store. This commands the Legate runtime to give
+    # the entire store to every task instantiated by this task descriptor
+    task.add_broadcast(result)
+    # Declares that the tasks will do reductions to the result store and
+    # that outputs from the tasks should be combined by addition
+    task.add_reduction(result, ty.ReductionOp.ADD)
+
+    task.execute()
+
+    return result
+
+
+def categorize(input: Store, bins: Store) -> Store:
+    result = context.create_store(ty.uint64, input.shape)
+
+    task = context.create_auto_task(OpCode.CATEGORIZE)
+    task.add_input(input)
+    task.add_input(bins)
+    task.add_output(result)
+
+    # Broadcast the store that contains bin edges. Each task will get a copy
+    # of the entire bin edges
+    task.add_broadcast(bins)
+
+    task.execute()
+
+    return result
+
+
+def histogram(input: Store, bins: Store) -> Store:
+    """
+    Constructs a histogram for the given bins
+    Parameters
+    ----------
+    input : Store
+        Input
+    bins : int
+        Bin edges
+    Returns
+    -------
+    Store
+        Histogram
+    """
+    num_bins = bins.shape[0] - 1
+    result = fill((num_bins,), 0, ty.uint64)
+
+    task = context.create_auto_task(OpCode.HISTOGRAM)
+    task.add_input(input)
+    task.add_input(bins)
+    task.add_reduction(result, ty.ReductionOp.ADD)
+
+    # Broadcast both the result store and the one that contains bin edges.
+    task.add_broadcast(bins)
+    task.add_broadcast(result)
+
+    task.execute()
+
+    return result
diff --git a/legate/raft/special.py b/legate/raft/special.py
new file mode 100644
index 0000000000..1ba75f5104
--- /dev/null
+++ b/legate/raft/special.py
@@ -0,0 +1,34 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from legate.core import Store
+
+from .array_api import add, exp, log
+from .array_api import max as lg_max
+from .array_api import subtract, sum_over_axis
+
+
+def logsumexp(x: Store, axis: int) -> Store:
+    # The implementation below implements the following operations
+    # expressed via the numpy API:
+    # c = x.max()
+    # c + np.log(np.sum(np.exp(x - c)))
+
+    x_max = lg_max(x, axis=axis)
+    tmp0 = subtract(x.transpose((1, 0)), x_max).transpose((1, 0))
+    tmp = exp(tmp0)
+    s = sum_over_axis(tmp, axis=axis)
+    out = log(s)
+    ret = add(out, x_max)
+    return ret
diff --git a/legate/raft/tests.py b/legate/raft/tests.py
new file mode 100644
index 0000000000..5e66b8ace1
--- /dev/null
+++ b/legate/raft/tests.py
@@ -0,0 +1,56 @@
+import numpy as np
+import pytest
+from hypothesis import assume, example, given, note, settings
+from hypothesis import strategies as st
+from hypothesis.extra.numpy import array_shapes
+
+from .util import broadcast_shape, promote
+
+
+@given(a=array_shapes(), b=array_shapes())
+@settings(max_examples=1000)
+def test_broadcast(a, b):
+    try:
+        broadcasted_shape = broadcast_shape(a, b)
+    except ValueError:
+        note(f"incompatible {a} {b}")
+        with pytest.raises(ValueError):
+            np.ones(a) + np.ones(b)
+    else:
+        note(f"compatible: {a} {b} -> {broadcasted_shape}")
+        assert broadcasted_shape == (np.ones(a) + np.ones(b)).shape
+
+
+@st.composite
+def promotable_shapes(draw):
+    to_shape = draw(array_shapes(min_dims=2))
+
+    num_dims_to_remove = draw(st.integers(min_value=1, max_value=len(to_shape) - 1))
+    front = draw(st.booleans())
+    if front:
+        from_shape = tuple(to_shape[num_dims_to_remove:])
+    else:
+        from_shape = tuple(to_shape[: len(to_shape) - num_dims_to_remove])
+
+    try:
+        assume(broadcast_shape(from_shape, to_shape) == to_shape)
+    except ValueError:
+        assume(False)
+
+    return from_shape, to_shape
+
+
+@example(((2,), (1, 2, 3)))
+@given(shapes=promotable_shapes())
+@settings(max_examples=1000)
+def test_promote(shapes):
+    from_shape, to_shape = shapes
+
+    try:
+        broadcast_shape(from_shape, to_shape)
+    except ValueError:
+        pass
+    else:
+        promoted = list(from_shape)
+        promote(from_shape, to_shape, lambda dim, size: promoted.insert(dim, size))
+        assert tuple(promoted) == to_shape
diff --git a/legate/raft/util.py b/legate/raft/util.py
new file mode 100644
index 0000000000..b6ebb17dc8
--- /dev/null
+++ b/legate/raft/util.py
@@ -0,0 +1,70 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+def broadcast_shape(A, B):
+    # Add additional dimensions as needed.
+    max_dim = max(len(A), len(B))
+    A_ = tuple(A[::-1][i] if i < len(A) else B[::-1][i] for i in range(max_dim))
+    B_ = tuple(B[::-1][i] if i < len(B) else A[::-1][i] for i in range(max_dim))
+    assert len(A_) == len(B_) == max_dim
+    A_, B_ = tuple(A_), tuple(B_)
+
+    # Stretch existing dimensions as needed.
+    def stretch():
+        for a, b in zip(A_, B_):
+            if a != b:
+                if 1 in (a, b):
+                    yield max(a, b)
+                else:
+                    raise ValueError(f"Unable to broadcast together {A} and {B}.")
+            else:
+                yield a
+
+    ret = tuple(stretch())
+    assert len(ret) == max_dim
+    return tuple(reversed(ret))
+
+
+def _find_sequence(sequence, sub_sequence):
+    assert len(sub_sequence) <= len(sequence)
+
+    n = len(sub_sequence)
+
+    for i in range(len(sequence) - n + 1):
+        if sequence[i : i + n] == sub_sequence:
+            return i
+    raise IndexError
+
+
+def promote(from_shape, to_shape, promote_func):
+    try:
+        assert len(to_shape) > len(from_shape)
+        bs = broadcast_shape(from_shape, to_shape)
+        for a, b in zip(to_shape, bs):
+            assert a == b
+    except AssertionError:
+        raise ValueError(f"Unable to promote {from_shape} to {to_shape}")
+
+    try:
+        idx_start = _find_sequence(to_shape, from_shape)
+        idx_stop = idx_start + len(from_shape)
+    except IndexError:
+        raise ValueError(f"Unable to promote {from_shape} to {to_shape}")
+
+    for i in range(idx_start):
+        promote_func(i, to_shape[i])
+    for i in range(idx_stop, len(to_shape)):
+        promote_func(i, to_shape[i])
diff --git a/naive_bayes/__init__.py b/naive_bayes/__init__.py
new file mode 100644
index 0000000000..a4cc9240b7
--- /dev/null
+++ b/naive_bayes/__init__.py
@@ -0,0 +1,5 @@
+from .multinomial import MultinomialNB
+
+__all__ = [
+    "MultinomialNB",
+]
diff --git a/naive_bayes/cn/__init__.py b/naive_bayes/cn/__init__.py
new file mode 100644
index 0000000000..2e0b3eab48
--- /dev/null
+++ b/naive_bayes/cn/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .multinomial import MultinomialNB
+
+__all__ = ["MultinomialNB"]
diff --git a/naive_bayes/cn/multinomial.py b/naive_bayes/cn/multinomial.py
new file mode 100644
index 0000000000..d587756db5
--- /dev/null
+++ b/naive_bayes/cn/multinomial.py
@@ -0,0 +1,100 @@
+# Part of this code was adapted from sklearn/naive_bayes.py which
+# is distributed under the following copyright:
+#     Copyright (c) 2007–2020 The scikit-learn developers.
+# and licensed under the New BSD License (see LICENSE.scikit_learn).
+
+# import numpy as np
+import cunumeric as np
+from scipy.special import logsumexp
+from sklearn.preprocessing import LabelBinarizer
+from sklearn.utils.extmath import safe_sparse_dot
+
+
+def sparse_dot(a, b):
+    # TODO: Actually implement handling of sparse matrices.
+    # cunumeric does not support integer types for this operation.
+    # TODO: Handle a.dtype != b.dtype
+    return np.asarray(
+        safe_sparse_dot(
+            np.asarray(a, dtype=np.float64), np.asarray(b, dtype=np.float64)
+        ),
+        dtype=a.dtype,
+    )
+
+
+def sum_(a, axis=None):
+    return np.sum(a, axis=axis)
+
+
+class MultinomialNB:
+    def __init__(self, alpha: float = 1.0, fit_prior=True, class_prior=None):
+        self.alpha = alpha
+        self.fit_prior = False
+        self.class_prior = class_prior
+
+    def fit(self, X, y):
+        # convert to cunumeric arrays
+        X = np.asarray(X)
+        y = np.asarray(y)
+        _, n_features = X.shape
+        self.n_features_in_ = n_features
+
+        labelbin = LabelBinarizer()
+        Y = np.asarray(labelbin.fit_transform(y))  # need to convert output again
+
+        self.classes_ = labelbin.classes_
+        assert Y.shape[1] != 1
+
+        n_classes = Y.shape[1]
+        self._init_counters(n_classes, n_features)
+        self._count(X, Y)
+        self._update_feature_log_proba(self.alpha)
+        self._update_class_log_prior(class_prior=self.class_prior)
+
+        return self
+
+    def _count(self, X, Y):
+        self.feature_count_ += sparse_dot(Y.T, X)
+        self.class_count_ += sum_(Y, axis=0)
+
+    def _init_counters(self, n_classes, n_features):
+        self.class_count_ = np.zeros(n_classes, dtype=np.float64)
+        self.feature_count_ = np.zeros((n_classes, n_features), dtype=np.float64)
+
+    def _update_feature_log_proba(self, alpha):
+        # Adapted from sklearn/naive_bayes.py
+        smoothed_fc = self.feature_count_ + alpha
+        smoothed_cc = sum_(smoothed_fc, axis=1)
+
+        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(
+            smoothed_cc.reshape(-1, 1)
+        )
+
+    def _update_class_log_prior(self, class_prior=None):
+        n_classes = len(self.classes_)
+        if class_prior is not None:
+            assert len(class_prior) == n_classes
+            self.class_log_prior_ = np.log(class_prior)
+        elif self.fit_prior:
+            raise NotImplementedError
+        else:
+            self.class_log_prior_ = np.full(n_classes, -np.log(n_classes))
+
+    def _joint_log_likelihood(self, X):
+        return sparse_dot(X, self.feature_log_prob_.T) + self.class_log_prior_
+
+    def predict_log_proba(self, X):
+        X = np.asarray(X)
+        jll = self._joint_log_likelihood(X)
+        # normalize by P(x) = P(f_1, ..., f_n)
+        log_prob_x = logsumexp(jll, axis=1)
+        return jll - np.atleast_2d(log_prob_x).T
+
+    def predict_proba(self, X):
+        X = np.asarray(X)
+        return np.exp(self.predict_log_proba(X))
+
+    def predict(self, X):
+        X = np.asarray(X)
+        jll = self._joint_log_likelihood(X)
+        return self.classes_[np.argmax(jll, axis=1)]
diff --git a/naive_bayes/conftest.py b/naive_bayes/conftest.py
new file mode 100644
index 0000000000..7ac2b1fa6a
--- /dev/null
+++ b/naive_bayes/conftest.py
@@ -0,0 +1,22 @@
+import numpy as np
+import pytest
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import CountVectorizer
+
+
+def _nlp_20news():
+    try:
+        twenty_train = fetch_20newsgroups(subset="train", shuffle=True, random_state=42)
+    except:  # noqa E722
+        pytest.xfail(reason="Error fetching 20 newsgroup dataset")
+
+    count_vect = CountVectorizer()
+    X = count_vect.fit_transform(twenty_train.data)
+    Y = np.array(twenty_train.target)
+
+    return X, Y
+
+
+@pytest.fixture(scope="module")
+def nlp_20news():
+    return _nlp_20news()
diff --git a/naive_bayes/helper.py b/naive_bayes/helper.py
new file mode 100644
index 0000000000..3a9aa27a07
--- /dev/null
+++ b/naive_bayes/helper.py
@@ -0,0 +1,49 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from dataclasses import dataclass
+from typing import Any
+
+import cunumeric as num
+import pyarrow as pa
+
+from legate.core import Array, Store
+
+
+@dataclass
+class _StoreWrapper:
+    store: Store
+
+    @property
+    def __legate_data_interface__(self) -> dict[str, Any]:
+        """
+        Constructs a Legate data interface object from a store wrapped in this
+        object
+        """
+        dtype = self.store.type.type
+        array = Array(dtype, [None, self.store])
+
+        # Create a field metadata to populate the data field
+        field = pa.field("Array", dtype, nullable=False)
+
+        return {
+            "version": 1,
+            "data": {field: array},
+        }
+
+
+def to_cunumeric_array(store: Store) -> num.ndarray:
+    return num.asarray(_StoreWrapper(store))
diff --git a/naive_bayes/multinomial.py b/naive_bayes/multinomial.py
new file mode 100644
index 0000000000..5a9fcde3d3
--- /dev/null
+++ b/naive_bayes/multinomial.py
@@ -0,0 +1,120 @@
+# Part of this code was adapted from sklearn/naive_bayes.py which
+# is distributed under the following copyright:
+#     Copyright (c) 2007–2020 The scikit-learn developers.
+# and licensed under the New BSD License (see LICENSE.scikit_learn).
+
+import numpy as np
+import pyarrow as pa
+
+# import cunumeric as np
+from sklearn.preprocessing import LabelBinarizer
+
+from legate.raft import (
+    add,
+    as_array,
+    as_store,
+    convert,
+    exp,
+    fill,
+    log,
+    matmul,
+    subtract,
+    sum_over_axis,
+)
+from legate.raft.special import logsumexp
+
+
+class MultinomialNB:
+    def __init__(self, alpha: float = 1.0, fit_prior=True, class_prior=None):
+        self.alpha = alpha
+        self.fit_prior = False
+        self.class_prior = class_prior
+        self._feature_log_prob_ = None
+        self._class_log_prior_ = None
+
+    def fit(self, X, y):
+        # Convert to a legate stores
+        X = as_store(X)
+        _, n_features = X.shape
+        self.n_features_in_ = n_features
+
+        labelbin = LabelBinarizer()
+        Y = as_store(labelbin.fit_transform(y))
+
+        self.classes_ = labelbin.classes_
+        assert Y.shape[1] != 1
+
+        Y_transposed = Y.transpose((1, 0))
+        self.feature_count_ = matmul(Y_transposed, X)
+        self.class_count_ = sum_over_axis(Y, axis=0)
+        self._update_feature_log_proba(self.alpha)
+        self._update_class_log_prior(class_prior=self.class_prior)
+
+        return self
+
+    def _update_feature_log_proba(self, alpha, dtype=pa.float64()):
+        # Adapted from sklearn/naive_bayes.py
+
+        fc_ = convert(self.feature_count_, dtype)
+        smoothed_fc = add(fc_, alpha)
+        smoothed_cc = sum_over_axis(smoothed_fc, axis=1)
+
+        # self.feature_log_prob_ = np.log(smoothed_fc) - np.log(
+        #     smoothed_cc.reshape(-1, 1)
+        # )
+        x1 = log(smoothed_fc)
+        x2 = log(smoothed_cc)
+        self._feature_log_prob_ = subtract(x1.transpose((1, 0)), x2).transpose((1, 0))
+
+    @property
+    def feature_log_prob_(self):
+        if self._feature_log_prob_ is not None:
+            return as_array(self._feature_log_prob_)
+
+    @property
+    def class_log_prior_(self):
+        if self._class_log_prior_ is not None:
+            return as_array(self._class_log_prior_)
+
+    def _update_class_log_prior(self, class_prior=None):
+        n_classes = len(self.classes_)
+        if class_prior is not None:
+            assert len(class_prior) == n_classes
+            self._class_log_prior_ = log(class_prior)
+        elif self.fit_prior:
+            raise NotImplementedError
+        else:
+            self._class_log_prior_ = fill(n_classes, -np.log(n_classes))
+
+    def _joint_log_likelihood(self, X):
+        X_converted = convert(X, pa.float64())
+
+        x1 = matmul(X_converted, self._feature_log_prob_.transpose((1, 0)))
+        x2 = self._class_log_prior_
+        return add(x1, x2)
+
+    def _predict_log_proba(self, X):
+        jll = self._joint_log_likelihood(X)
+        # normalize by P(x) = P(f_1, ..., f_n)
+
+        log_prob_x = logsumexp(jll, axis=1)
+
+        x1 = jll
+        x2 = log_prob_x
+        return subtract(x1.transpose((1, 0)), x2).transpose((1, 0))
+
+    def predict_log_proba(self, X):
+        X = as_store(X)
+        ret = self._predict_log_proba(X)
+        return as_array(ret)
+
+    def predict_proba(self, X):
+        X = as_store(X)
+        ret = exp(self._predict_log_proba(X))
+        return as_array(ret)
+
+    def predict(self, X):
+        X = as_store(X)
+        jll = as_array(self._joint_log_likelihood(X))
+        ret = self.classes_[np.argmax(jll, axis=1)]
+        return ret
diff --git a/naive_bayes/tests.py b/naive_bayes/tests.py
new file mode 100644
index 0000000000..303e5d8642
--- /dev/null
+++ b/naive_bayes/tests.py
@@ -0,0 +1,55 @@
+# import numpy as np
+import cunumeric as np
+from numpy.testing import assert_allclose
+from sklearn.metrics import accuracy_score
+from sklearn.naive_bayes import MultinomialNB as skNB
+
+from naive_bayes import MultinomialNB
+from naive_bayes.cn.multinomial import MultinomialNB as CNMultinomialNB
+
+
+def test_multinomial(nlp_20news):
+    X_sparse, y = nlp_20news
+    n_rows = 500
+    n_cols = 10000
+
+    X = X_sparse[:n_rows, :n_cols]
+    # TODO: Implement support for sparse arrays.
+    X = np.ascontiguousarray(X.todense())
+    y = y[:n_rows]
+
+    legate_model = MultinomialNB()
+    cn_legate_model = CNMultinomialNB()
+    sk_model = skNB()
+
+    sk_model.fit(X, y)
+    cn_legate_model.fit(X, y)
+    legate_model.fit(X, y)
+
+    sk_log_proba = sk_model.predict_log_proba(X)
+    legate_log_proba = legate_model.predict_log_proba(X)
+    cn_legate_log_proba = cn_legate_model.predict_log_proba(X)
+    sk_proba = sk_model.predict_proba(X)
+    legate_proba = legate_model.predict_proba(X)
+    cn_legate_proba = cn_legate_model.predict_proba(X)
+    # sk_score = sk_model.score(X, y)
+    # legate_score = legate_model.score(X, y)
+
+    y_sk = sk_model.predict(X)
+    y_legate = legate_model.predict(X)
+    y_cn_legate = cn_legate_model.predict(X)
+
+    assert_allclose(cn_legate_log_proba, sk_log_proba, atol=5e-1, rtol=5e-1)
+    assert_allclose(cn_legate_proba, sk_proba, atol=2e-1, rtol=2.5)
+    assert_allclose(legate_log_proba, sk_log_proba, atol=5e-1, rtol=5e-1)
+    assert_allclose(legate_proba, sk_proba, atol=2e-1, rtol=2.5)
+    assert accuracy_score(y, y_sk) >= 0.45
+    assert accuracy_score(y, y_cn_legate) >= 0.45
+    assert accuracy_score(y, y_legate) >= 0.45
+    print("PASS")
+
+
+if __name__ == "__main__":
+    from conftest import _nlp_20news
+
+    test_multinomial(_nlp_20news())
diff --git a/pytest/test_knn.py b/pytest/test_knn.py
index 02b70aeb50..46d65463fb 100644
--- a/pytest/test_knn.py
+++ b/pytest/test_knn.py
@@ -1,9 +1,9 @@
-import pytest
 import numpy as np
 from sklearn.datasets import make_blobs
 from sklearn.neighbors import NearestNeighbors
 from legate.raft import run_knn
 
+
 def test_knn():
     k = 8
     metric = 'l2'
@@ -12,8 +12,7 @@ def test_knn():
     n_search_rows = 16
 
     X, _ = make_blobs(n_samples=n_index_rows + n_search_rows,
-                        centers=5,
-                        n_features=n_features)
+                      centers=5, n_features=n_features)
     blob_index = X[:n_index_rows].astype(np.float32)
     blob_search = X[n_index_rows:].astype(np.float32)
     nn = NearestNeighbors(n_neighbors=k)
diff --git a/setup.cfg b/setup.cfg
index 1f4fdc55d5..19d6e5c115 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -6,42 +6,11 @@ style = pep440
 tag_prefix = v
 
 [flake8]
-exclude = __init__.py
-ignore =
-    # line break before binary operator
-    W503
-    # whitespace before :
-    E203
-    # undefined, or defined from star imports
-    F405
+max-line-length = 88
+extend-ignore = E203
 
 [isort]
-line_length=79
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-order_by_type=True
-known_third_party=
-    numpy
-known_legion=
-    legion_cffi
-    legion_top
-known_first_party=
-    legate.raft
-default_section=THIRDPARTY
-sections=FUTURE,STDLIB,THIRDPARTY,LEGION,FIRSTPARTY,LOCALFOLDER
-skip=
-    .eggs
-    .git
-    .mypy_cache
-    .tox
-    .venv
-    _build
-    build
-    dist
-    legion
-    __init__.py
+profile = black
 
 [options]
 packages = find: