diff --git a/.gitignore b/.gitignore
index 74028d08c4..6f34017d0a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,13 @@
-build/
+.eggs/
 .idea
-
+__pycache__/
+_skbuild/
+build/
+cpp/src/legate_library.cc
+cpp/src/legate_library.h.eggs/
+dist/
+legate.raft.egg-info/
+legate/raft/__pycache__/
+legate/raft/install_info.py
+legate/raft/library.py
+pytest/__pycache__
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000..c5ae9108f1
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,30 @@
+---
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+repos:
+    - repo: https://github.com/psf/black
+      rev: 23.1.0
+      hooks:
+          - id: black
+            exclude: 'legate/naive_bayes/library\.py$'
+    - repo: https://github.com/PyCQA/flake8
+      rev: 6.0.0
+      hooks:
+          - id: flake8
+            exclude: 'legate/naive_bayes/library\.py$'
+    - repo: https://github.com/codespell-project/codespell
+      rev: v2.2.4
+      hooks:
+          - id: codespell
+            additional_dependencies: [tomli]
+    - repo: https://github.com/pycqa/isort
+      rev: 5.12.0
+      hooks:
+          - id: isort
+    - repo: https://github.com/kynan/nbstripout
+      rev: 0.6.1
+      hooks:
+          - id: nbstripout
+
+default_language_version:
+    python: python3
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000000..1c36794100
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,46 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
+
+# ------------- configure rapids-cmake --------------#
+
+include(cmake/thirdparty/fetch_rapids.cmake)
+include(rapids-cmake)
+include(rapids-cpm)
+include(rapids-cuda)
+include(rapids-export)
+include(rapids-find)
+
+# ------------- configure project -------------- #
+
+rapids_cuda_init_architectures(legate_raft)
+project(legate_raft LANGUAGES C CXX CUDA)
+
+# ------------- configure raft ----------------- #
+
+rapids_cpm_init()
+include(cmake/thirdparty/get_raft.cmake)
+
+# -------------- add requirements -------------- #
+
+find_package(legate_core REQUIRED)
+set(BUILD_SHARED_LIBS ON)
+
+# -------------- compile tasks ----------------- #
+
+# C++ layer
+legate_add_cpp_subdirectory(cpp/src TARGET legate_raft EXPORT legate_raft-export)
+
+# Python layer
+add_subdirectory(legate)
diff --git a/build.sh b/build.sh
index ee1efa9d57..fc55afffa9 100755
--- a/build.sh
+++ b/build.sh
@@ -7,6 +7,8 @@
 # Abort script on first error
 set -e
 
+INSTALL_PREFIX=${INSTALL_PREFIX:=${PREFIX:=${CONDA_PREFIX}}}
+
 PARALLEL_LEVEL=${PARALLEL_LEVEL:=`nproc`}
 
 BUILD_TYPE=Release
@@ -24,6 +26,13 @@ fi
 
 if [ "$1" == "clean" ]; then
   rm -rf cpp/build
+  rm -rf dist legate.raft.egg-info
+  rm cpp/src/legate_library.cc
+  rm cpp/src/legate_library.h
+  python setup.py clean --all
+  rm legate/raft/install_info.py
+  rm legate/raft/library.py
+  rm -rf pytest/__pycache__
   exit 0
 fi
 
@@ -35,7 +44,12 @@ cmake \
  -DRAFT_NVTX=OFF \
  -DCMAKE_CUDA_ARCHITECTURES="NATIVE" \
  -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
  ${EXTRA_CMAKE_ARGS} \
- ../
+ ../../
+
+cmake --build . -j${PARALLEL_LEVEL}
+cmake --install . --prefix ${INSTALL_PREFIX}
 
-cmake  --build . -j${PARALLEL_LEVEL}
+cd ../..
+python setup.py install
diff --git a/cmake/thirdparty/fetch_rapids.cmake b/cmake/thirdparty/fetch_rapids.cmake
new file mode 100644
index 0000000000..40ba83be9e
--- /dev/null
+++ b/cmake/thirdparty/fetch_rapids.cmake
@@ -0,0 +1,21 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+# Use this variable to update RAPIDS and RAFT versions
+set(RAPIDS_VERSION "23.04")
+
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+    file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${RAPIDS_VERSION}/RAPIDS.cmake
+            ${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
+endif()
+include(${CMAKE_CURRENT_BINARY_DIR}/RAFT_RAPIDS.cmake)
diff --git a/cmake/thirdparty/get_raft.cmake b/cmake/thirdparty/get_raft.cmake
new file mode 100644
index 0000000000..5463942adf
--- /dev/null
+++ b/cmake/thirdparty/get_raft.cmake
@@ -0,0 +1,62 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+# Use RAPIDS_VERSION from cmake/thirdparty/fetch_rapids.cmake
+set(RAFT_VERSION "${RAPIDS_VERSION}")
+set(RAFT_FORK "rapidsai")
+set(RAFT_PINNED_TAG "branch-${RAPIDS_VERSION}")
+
+function(find_and_configure_raft)
+    set(oneValueArgs VERSION FORK PINNED_TAG COMPILE_LIBRARY ENABLE_NVTX ENABLE_MNMG_DEPENDENCIES)
+    cmake_parse_arguments(PKG "${options}" "${oneValueArgs}"
+            "${multiValueArgs}" ${ARGN} )
+
+    set(RAFT_COMPONENTS "")
+    if(PKG_COMPILE_LIBRARY)
+        string(APPEND RAFT_COMPONENTS " compiled")
+    endif()
+
+    if(PKG_ENABLE_MNMG_DEPENDENCIES)
+        string(APPEND RAFT_COMPONENTS " distributed")
+    endif()
+
+    #-----------------------------------------------------
+    # Invoke CPM find_package()
+    #-----------------------------------------------------
+    rapids_cpm_find(raft ${PKG_VERSION}
+            GLOBAL_TARGETS      raft::raft
+            BUILD_EXPORT_SET    raft-template-exports
+            INSTALL_EXPORT_SET  raft-template-exports
+            COMPONENTS          ${RAFT_COMPONENTS}
+            CPM_ARGS
+            GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git
+            GIT_TAG        ${PKG_PINNED_TAG}
+            SOURCE_SUBDIR  cpp
+            OPTIONS
+            "BUILD_TESTS OFF"
+            "BUILD_BENCH OFF"
+            "RAFT_NVTX   ${ENABLE_NVTX}"
+            "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_LIBRARY}"
+            )
+endfunction()
+
+# Change pinned tag here to test a commit in CI
+# To use a different RAFT locally, set the CMake variable
+# CPM_raft_SOURCE=/path/to/local/raft
+find_and_configure_raft(VERSION  ${RAFT_VERSION}.00
+        FORK                     ${RAFT_FORK}
+        PINNED_TAG               ${RAFT_PINNED_TAG}
+        COMPILE_LIBRARY          ON
+        ENABLE_MNMG_DEPENDENCIES OFF
+        ENABLE_NVTX              OFF
+)
diff --git a/cpp/legate_raft/install_info.py b/cpp/legate_raft/install_info.py
deleted file mode 100644
index 85aa119d8f..0000000000
--- a/cpp/legate_raft/install_info.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from pathlib import Path
-
-def get_libpath():
-    import os, sys, platform
-    join = os.path.join
-    exists = os.path.exists
-    dirname = os.path.dirname
-    cn_path = dirname(dirname(__file__))
-    so_ext = {
-        "": "",
-        "Java": ".jar",
-        "Linux": ".so",
-        "Darwin": ".dylib",
-        "Windows": ".dll"
-    }[platform.system()]
-
-    def find_lib(libdir):
-        target = f"liblegate_raft{so_ext}*"
-        search_path = Path(libdir)
-        matches = [m for m in search_path.rglob(target)]
-        if matches:
-          return matches[0].parent
-        return None
-
-    return (
-        find_lib("/home/cjnolet/workspace/rapids_projects/legate.raft/cpp/build/legate_legate_raft") or
-        find_lib(join(dirname(dirname(dirname(cn_path))), "lib")) or
-        find_lib(join(dirname(dirname(sys.executable)), "lib")) or
-        ""
-    )
-
-libpath: str = get_libpath()
-
-header: str = """
-  enum LegateRaftOpCode {
-    _OP_CODE_BASE = 0,
-    FUSED_1NN = 1,
-    HISTOGRAM = 2
-};
-
-  void legate_raft_perform_registration();
-"""
diff --git a/cpp/src/CMakeLists.txt b/cpp/src/CMakeLists.txt
index 9e34206a4b..8918f3ca1f 100644
--- a/cpp/src/CMakeLists.txt
+++ b/cpp/src/CMakeLists.txt
@@ -1,16 +1,27 @@
-
 legate_cpp_library_template(legate_raft TEMPLATE_SOURCES)
 
 add_library(
         legate_raft
 
-        # RAFT API wrappers
+        # RAFT API Wrappers
         raft/histogram.cu
         raft/distance.cu
 
         # Legate tasks
-        task/compute_1nn.cc
+        task/add.cc
+        # task/compute_1nn.cc
+        task/add_constant.cc
+        task/bincount.cc
+        task/categorize.cc
+        task/convert.cc
+        task/exp.cc
+        task/fill.cc
+        task/find_max.cc
         task/histogram.cc
+        task/log.cc
+        task/matmul.cc
+        task/mul.cc
+        task/sum_over_axis.cc
 
         # Library templates
         ${TEMPLATE_SOURCES}
@@ -23,4 +34,117 @@ target_include_directories(legate_raft
         $<INSTALL_INTERFACE:include>
         )
 
-target_link_libraries(legate_raft PRIVATE legate::core raft::raft raft::compiled)
\ No newline at end of file
+target_link_libraries(legate_raft PRIVATE legate::core raft::raft raft::compiled)
+
+
+install(TARGETS legate_raft
+        DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
+)
+
+
+function(legate_add_cffi header output_dir)
+  if (NOT DEFINED CMAKE_C_COMPILER)
+    message(FATAL_ERROR "Must enable C language to build Legate projects")
+  endif()
+
+  set(options)
+  set(one_value_args TARGET)
+  set(multi_value_args)
+  cmake_parse_arguments(
+    LEGATE_OPT
+    "${options}"
+    "${one_value_args}"
+    "${multi_value_args}"
+    ${ARGN}
+  )
+  # abbreviate for the function below
+  set(target ${LEGATE_OPT_TARGET})
+  set(install_info_in
+[=[
+from pathlib import Path
+
+def get_libpath():
+    import os, sys, platform
+    join = os.path.join
+    exists = os.path.exists
+    dirname = os.path.dirname
+    cn_path = dirname(dirname(__file__))
+    so_ext = {
+        "": "",
+        "Java": ".jar",
+        "Linux": ".so",
+        "Darwin": ".dylib",
+        "Windows": ".dll"
+    }[platform.system()]
+
+    def find_lib(libdir):
+        target = f"lib@target@{so_ext}*"
+        search_path = Path(libdir)
+        matches = [m for m in search_path.rglob(target)]
+        if matches:
+          return matches[0].parent
+        return None
+
+    return (
+        find_lib("@libdir@") or
+        find_lib(join(dirname(dirname(dirname(cn_path))), "lib")) or
+        find_lib(join(dirname(dirname(sys.executable)), "lib")) or
+        ""
+    )
+
+libpath: str = get_libpath()
+
+header: str = """
+  @header@
+  void @target@_perform_registration();
+"""
+]=])
+  set(install_info_py_in ${CMAKE_BINARY_DIR}/legate_${target}/install_info.py.in)
+  set(install_info_py ${CMAKE_SOURCE_DIR}/${output_dir}/install_info.py)
+  file(WRITE ${install_info_py_in} "${install_info_in}")
+
+  set(generate_script_content
+  [=[
+    execute_process(
+      COMMAND ${CMAKE_C_COMPILER}
+        -E
+        -P @header@
+      ECHO_ERROR_VARIABLE
+      OUTPUT_VARIABLE header
+      COMMAND_ERROR_IS_FATAL ANY
+    )
+    configure_file(
+        @install_info_py_in@
+        @install_info_py@
+        @ONLY)
+  ]=])
+
+  set(generate_script ${CMAKE_BINARY_DIR}/gen_install_info.cmake)
+  file(CONFIGURE
+       OUTPUT ${generate_script}
+       CONTENT "${generate_script_content}"
+       @ONLY
+  )
+
+  if (DEFINED ${target}_BUILD_LIBDIR)
+    # this must have been imported from an existing editable build
+    set(libdir ${${target}_BUILD_LIBDIR})
+  else()
+    # libraries are built in a common spot
+    set(libdir ${CMAKE_BINARY_DIR}/legate_${target})
+    message("libdir to binary dir")
+  endif()
+  add_custom_target("${target}_generate_install_info_py" ALL
+    COMMAND ${CMAKE_COMMAND}
+      -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+      -Dtarget=${target}
+      -Dlibdir=${libdir}
+      -P ${generate_script}
+    OUTPUT ${install_info_py}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMENT "Generating install_info.py"
+    DEPENDS ${header}
+  )
+endfunction()
+
+legate_add_cffi(legate_raft_cffi.h legate/raft TARGET legate_raft)
diff --git a/cpp/src/legate_library.cc b/cpp/src/legate_library.cc
deleted file mode 100644
index 38cc9411ae..0000000000
--- a/cpp/src/legate_library.cc
+++ /dev/null
@@ -1,88 +0,0 @@
-#include "legate_library.h"
-#include "core/mapping/mapping.h"
-
-namespace legate_raft {
-
-class Mapper : public legate::mapping::LegateMapper {
- public:
-  Mapper(){}
-
- private:
-  Mapper(const Mapper& rhs)            = delete;
-  Mapper& operator=(const Mapper& rhs) = delete;
-
-  // Legate mapping functions
- public:
-  void set_machine(const legate::mapping::MachineQueryInterface* machine) override {
-    machine_ = machine;
-  }
-
-  legate::mapping::TaskTarget task_target(
-    const legate::mapping::Task& task,
-    const std::vector<legate::mapping::TaskTarget>& options) override {
-    return *options.begin();
-  }
-
-  std::vector<legate::mapping::StoreMapping> store_mappings(
-    const legate::mapping::Task& task,
-    const std::vector<legate::mapping::StoreTarget>& options) override {
-    using legate::mapping::StoreMapping;
-    std::vector<StoreMapping> mappings;
-    auto& inputs  = task.inputs();
-    auto& outputs = task.outputs();
-    for (auto& input : inputs) {
-      mappings.push_back(StoreMapping::default_mapping(input, options.front()));
-      mappings.back().policy.exact = true;
-    }
-    for (auto& output : outputs) {
-      mappings.push_back(StoreMapping::default_mapping(output, options.front()));
-      mappings.back().policy.exact = true;
-    }
-    return std::move(mappings);
-  }
-
-  legate::Scalar tunable_value(legate::TunableID tunable_id) override {
-    return 0;
-  }
-
- private:
-  const legate::mapping::MachineQueryInterface* machine_;
-};
-
-static const char* const library_name = "legate_raft";
-
-Legion::Logger log_legate_raft(library_name);
-
-/*static*/ legate::TaskRegistrar& Registry::get_registrar()
-{
-  static legate::TaskRegistrar registrar;
-  return registrar;
-}
-
-void registration_callback()
-{
-  legate::ResourceConfig config;
-  config.max_mappers       = 1;
-  config.max_tasks         = 1024;
-  config.max_reduction_ops = 8;
-  legate::LibraryContext context(library_name, config);
-
-  Registry::get_registrar().register_all_tasks(context);
-
-  // Now we can register our mapper with the runtime
-  context.register_mapper(std::make_unique<Mapper>(), 0);
-}
-
-}  // namespace legate_raft
-
-extern "C" {
-
-void legate_raft_perform_registration(void)
-{
-  // Tell the runtime about our registration callback so we hook it
-  // in before the runtime starts and make it global so that we know
-  // that this call back is invoked everywhere across all nodes
-  legate::Core::perform_registration<legate_raft::registration_callback>();
-}
-
-}
diff --git a/cpp/src/legate_raft_cffi.h b/cpp/src/legate_raft_cffi.h
index e32f09f031..5e84c268a7 100644
--- a/cpp/src/legate_raft_cffi.h
+++ b/cpp/src/legate_raft_cffi.h
@@ -1,5 +1,17 @@
 enum LegateRaftOpCode {
     _OP_CODE_BASE = 0,
-    FUSED_1NN   = 1,
-    HISTOGRAM   = 2
-};
\ No newline at end of file
+    ADD,
+    ADD_CONSTANT,
+    BINCOUNT,
+    CATEGORIZE,
+    CONVERT,
+    EXP,
+    FILL,
+    FIND_MAX,
+    FUSED_1NN,
+    HISTOGRAM,
+    LOG,
+    MATMUL,
+    MUL,
+    SUM_OVER_AXIS,
+};
diff --git a/cpp/src/task/add.cc b/cpp/src/task/add.cc
new file mode 100644
index 0000000000..c07eea1f06
--- /dev/null
+++ b/cpp/src/task/add.cc
@@ -0,0 +1,72 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct add_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& x1, legate::Store& x2)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = x1.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto x1_acc = x1.read_accessor<VAL, DIM>();
+    auto x2_acc = x2.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = x1_acc[p] + x2_acc[p];
+    }
+  }
+};
+
+}  // namespace
+
+class AddTask : public Task<AddTask, ADD> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input1  = context.inputs()[0];
+    auto& input2  = context.inputs()[1];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input1.dim(), input1.code(), add_fn{}, output, input1, input2);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::AddTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/add_constant.cc b/cpp/src/task/add_constant.cc
new file mode 100644
index 0000000000..5f4318a1a5
--- /dev/null
+++ b/cpp/src/task/add_constant.cc
@@ -0,0 +1,69 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct add_constant_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input, legate::Scalar& value)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = input_acc[p] + value.value<VAL>();
+    }
+  }
+};
+
+}  // namespace
+
+class AddConstantTask : public Task<AddConstantTask, ADD_CONSTANT> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& value  = context.scalars()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), add_constant_fn{}, output, input, value);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::AddConstantTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/bincount.cc b/cpp/src/task/bincount.cc
new file mode 100644
index 0000000000..0426192274
--- /dev/null
+++ b/cpp/src/task/bincount.cc
@@ -0,0 +1,55 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+class BincountTask : public Task<BincountTask, BINCOUNT> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.reductions()[0];
+
+    auto in_shape  = input.shape<1>();
+    auto out_shape = output.shape<1>();
+
+    auto in_acc  = input.read_accessor<uint64_t, 1>();
+    auto out_acc = output.reduce_accessor<legate::SumReduction<uint64_t>, true, 1>();
+
+    for (legate::PointInRectIterator<1> it(in_shape); it.valid(); ++it) {
+      auto& value = in_acc[*it];
+      legate::Point<1> pos_reduce(static_cast<int64_t>(value));
+
+      if (out_shape.contains(pos_reduce)) out_acc.reduce(pos_reduce, 1);
+    }
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::BincountTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/categorize.cc b/cpp/src/task/categorize.cc
new file mode 100644
index 0000000000..ab188befed
--- /dev/null
+++ b/cpp/src/task/categorize.cc
@@ -0,0 +1,89 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/type_traits.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct categorize_fn {
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<!legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto in_shape  = result.shape<1>();
+    auto bin_shape = bins.shape<1>();
+
+    assert(!bin_shape.empty());
+    if (in_shape.empty()) return;
+
+    auto num_bins = bin_shape.hi[0] - bin_shape.lo[0];
+
+    auto in_acc  = input.read_accessor<VAL, 1>();
+    auto bin_acc = bins.read_accessor<VAL, 1>();
+    auto res_acc = result.write_accessor<uint64_t, 1>();
+
+    for (legate::PointInRectIterator<1> it(in_shape); it.valid(); ++it) {
+      auto p      = *it;
+      auto& value = in_acc[p];
+      for (auto bin_idx = 0; bin_idx < num_bins; ++bin_idx) {
+        if (bin_acc[bin_idx] <= value && value < bin_acc[bin_idx + 1]) {
+          res_acc[p] = static_cast<uint64_t>(bin_idx);
+          break;
+        }
+      }
+    }
+  }
+
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    assert(false);
+  }
+};
+
+}  // namespace
+
+class CategorizeTask : public Task<CategorizeTask, CATEGORIZE> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& bins   = context.inputs()[1];
+    auto& result = context.outputs()[0];
+
+    legate::type_dispatch(input.code(), categorize_fn{}, result, input, bins);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::CategorizeTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/compute_1nn.cc b/cpp/src/task/compute_1nn.cc
index 7439af105f..2a0c630c5f 100644
--- a/cpp/src/task/compute_1nn.cc
+++ b/cpp/src/task/compute_1nn.cc
@@ -23,4 +23,4 @@ namespace  // unnamed
         legate_raft::Compute1NNTask::register_variants();
     }
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/cpp/src/task/convert.cc b/cpp/src/task/convert.cc
new file mode 100644
index 0000000000..56b76930f1
--- /dev/null
+++ b/cpp/src/task/convert.cc
@@ -0,0 +1,93 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/typedefs.h"
+
+namespace legate_raft {
+
+namespace {
+
+template <legate::LegateTypeCode SRC_TYPE, legate::LegateTypeCode DST_TYPE>
+struct convert_fn {
+  template <int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using SRC = legate::legate_type_of<SRC_TYPE>;
+    using DST = legate::legate_type_of<DST_TYPE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<SRC, DIM>();
+    auto output_acc = output.write_accessor<DST, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = static_cast<DST>(input_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class ConvertTask : public Task<ConvertTask, CONVERT> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.outputs()[0];
+
+    switch (input.code()) {
+        case legate::LegateTypeCode::INT64_LT:
+            switch(output.code()) {
+                case legate::LegateTypeCode::FLOAT_LT:
+                    return legate::dim_dispatch(
+                        input.dim(),
+                        convert_fn<legate::LegateTypeCode::INT64_LT, legate::LegateTypeCode::FLOAT_LT>{},
+                        output, input
+                    );
+                case legate::LegateTypeCode::DOUBLE_LT:
+                    return legate::dim_dispatch(
+                        input.dim(),
+                        convert_fn<legate::LegateTypeCode::INT64_LT, legate::LegateTypeCode::DOUBLE_LT>{},
+                        output, input
+                    );
+                default:
+                    throw(output.code()); // output type not supported
+            }
+        default:
+            throw(input.code()); // input type not supported
+    }
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::ConvertTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/exp.cc b/cpp/src/task/exp.cc
new file mode 100644
index 0000000000..f952845bf7
--- /dev/null
+++ b/cpp/src/task/exp.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct exp_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = exp(input_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class ExpTask : public Task<ExpTask, EXP> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), exp_fn{}, output, input);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::ExpTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/fill.cc b/cpp/src/task/fill.cc
new file mode 100644
index 0000000000..007b1af30d
--- /dev/null
+++ b/cpp/src/task/fill.cc
@@ -0,0 +1,67 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct fill_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Scalar& value)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = output.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = value.value<VAL>();
+    }
+  }
+};
+
+}  // namespace
+
+class FillTask : public Task<FillTask, FILL> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& value  = context.scalars()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(output.dim(), output.code(), fill_fn{}, output, value);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::FillTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/find_max.cc b/cpp/src/task/find_max.cc
new file mode 100644
index 0000000000..b17cc95485
--- /dev/null
+++ b/cpp/src/task/find_max.cc
@@ -0,0 +1,96 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/typedefs.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct find_max_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto in_acc  = input.read_accessor<VAL, DIM>();
+    auto red_acc = output.reduce_accessor<legate::MaxReduction<VAL>, true, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p = *it;
+      // Coordinates of the contracting dimension are ignored by red_acc via an affine
+      // transformation. For example, if the store was 3D and the second dimension was contracted,
+      // each point p will go through the following affine transformation to recover the point in
+      // the domain prior to the promotion:
+      //
+      //     | 1  0  0 |     | x |
+      //     |         |  *  | y |
+      //     | 0  0  1 |     | z |
+      //
+      // where the "*" operator denotes a matrix-vector multiplication.
+      red_acc.reduce(p, in_acc[p]);
+    }
+  }
+};
+
+
+template <legate::LegateTypeCode CODE>
+struct find_max_fn_outer {
+  template <int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    find_max_fn{}.operator()<CODE, DIM>(output, input);
+  }
+};
+
+
+}  // namespace
+
+class FindMaxTask : public Task<FindMaxTask, FIND_MAX> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.reductions()[0];
+
+    switch(input.code()) {
+      case legate::LegateTypeCode::DOUBLE_LT: {
+        return legate::dim_dispatch(input.dim(), find_max_fn_outer<legate::LegateTypeCode::DOUBLE_LT>{}, output, input);
+      default:
+        throw("Input type not supported.");
+      }
+    }
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::FindMaxTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/histogram.cc b/cpp/src/task/histogram.cc
index 742b632be2..8af2d5f99e 100644
--- a/cpp/src/task/histogram.cc
+++ b/cpp/src/task/histogram.cc
@@ -1,27 +1,87 @@
-#include "../raft/raft_api.hpp"
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
 
-#include "../legate_raft.h"
-#include "../legate_library.h"
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+#include "core/utilities/type_traits.h"
 
 namespace legate_raft {
 
-    // FUSED_1NN comes from
-    class HistogramTask : public Task<HistogramTask, HISTOGRAM> {
-    public:
-        static void gpu_variant(legate::TaskContext& context)
-        {
-            test_histogram();
+namespace {
+
+struct histogram_fn {
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<!legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto in_shape  = input.shape<1>();
+    auto bin_shape = bins.shape<1>();
+
+    assert(!bin_shape.empty());
+    if (in_shape.empty()) return;
+
+    auto num_bins = bin_shape.hi[0] - bin_shape.lo[0];
+
+    auto in_acc  = input.read_accessor<VAL, 1>();
+    auto bin_acc = bins.read_accessor<VAL, 1>();
+    auto res_acc = result.reduce_accessor<legate::SumReduction<uint64_t>, true, 1>();
+
+    for (legate::PointInRectIterator<1> it(in_shape); it.valid(); ++it) {
+      auto& value = in_acc[*it];
+      for (auto bin_idx = 0; bin_idx < num_bins; ++bin_idx)
+        if (bin_acc[bin_idx] <= value && value < bin_acc[bin_idx + 1]) {
+          res_acc.reduce(bin_idx, 1);
+          break;
         }
-    };
+    }
+  }
+
+  template <legate::LegateTypeCode CODE,
+            std::enable_if_t<legate::is_complex<CODE>::value>* = nullptr>
+  void operator()(legate::Store& result, legate::Store& input, legate::Store& bins)
+  {
+    assert(false);
+  }
+};
+
+}  // namespace
+
+class HistogramTask : public Task<HistogramTask, HISTOGRAM> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& bins   = context.inputs()[1];
+    auto& result = context.reductions()[0];
+
+    legate::type_dispatch(input.code(), histogram_fn{}, result, input, bins);
+  }
+};
 
 }  // namespace legate_raft
 
-namespace  // unnamed
-{
+namespace {
 
-    static void __attribute__((constructor)) register_tasks(void)
-    {
-        legate_raft::HistogramTask::register_variants();
-    }
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::HistogramTask::register_variants();
+}
 
-}  // namespace
\ No newline at end of file
+}  // namespace
diff --git a/cpp/src/task/log.cc b/cpp/src/task/log.cc
new file mode 100644
index 0000000000..22bff125a6
--- /dev/null
+++ b/cpp/src/task/log.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include <cmath>
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct log_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto input_acc = input.read_accessor<VAL, DIM>();
+    auto output_acc = output.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran order*/); it.valid(); ++it) {
+        auto p = *it;
+        output_acc[p] = log(input_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class LogTask : public Task<LogTask, LOG> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.outputs()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), log_fn{}, output, input);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::LogTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/matmul.cc b/cpp/src/task/matmul.cc
new file mode 100644
index 0000000000..eb9c8bdc1e
--- /dev/null
+++ b/cpp/src/task/matmul.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct matmul_fn {
+  template <legate::LegateTypeCode CODE>
+  void operator()(legate::Store& lhs, legate::Store& rhs1, legate::Store& rhs2)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = rhs1.shape<3>().intersection(rhs2.shape<3>());
+
+    if (shape.empty()) return;
+
+    auto rhs1_acc = rhs1.read_accessor<VAL, 3>();
+    auto rhs2_acc = rhs2.read_accessor<VAL, 3>();
+    auto lhs_acc  = lhs.reduce_accessor<legate::SumReduction<VAL>, true, 3>();
+
+    for (legate::PointInRectIterator<3> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p = *it;
+      lhs_acc.reduce(p, rhs1_acc[p] * rhs2_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class MatMulTask : public Task<MatMulTask, MATMUL> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& rhs1 = context.inputs()[0];
+    auto& rhs2 = context.inputs()[1];
+    auto& lhs  = context.reductions()[0];
+
+    legate::type_dispatch(lhs.code(), matmul_fn{}, lhs, rhs1, rhs2);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::MatMulTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/mul.cc b/cpp/src/task/mul.cc
new file mode 100644
index 0000000000..e71715c7e5
--- /dev/null
+++ b/cpp/src/task/mul.cc
@@ -0,0 +1,70 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct mul_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& lhs, legate::Store& rhs1, legate::Store& rhs2)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = lhs.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto rhs1_acc = rhs1.read_accessor<VAL, DIM>();
+    auto rhs2_acc = rhs2.read_accessor<VAL, DIM>();
+    auto lhs_acc  = lhs.write_accessor<VAL, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p     = *it;
+      lhs_acc[p] = rhs1_acc[p] * rhs2_acc[p];
+    }
+  }
+};
+
+}  // namespace
+
+class MultiplyTask : public Task<MultiplyTask, MUL> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& rhs1 = context.inputs()[0];
+    auto& rhs2 = context.inputs()[1];
+    auto& lhs  = context.outputs()[0];
+
+    legate::double_dispatch(lhs.dim(), lhs.code(), mul_fn{}, lhs, rhs1, rhs2);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::MultiplyTask::register_variants();
+}
+
+}  // namespace
diff --git a/cpp/src/task/sum_over_axis.cc b/cpp/src/task/sum_over_axis.cc
new file mode 100644
index 0000000000..e914c2c7c2
--- /dev/null
+++ b/cpp/src/task/sum_over_axis.cc
@@ -0,0 +1,78 @@
+/* Copyright 2023 NVIDIA Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
+#include "legate_library.h"
+#include "legate_raft_cffi.h"
+
+#include "core/utilities/dispatch.h"
+
+namespace legate_raft {
+
+namespace {
+
+struct reduction_fn {
+  template <legate::LegateTypeCode CODE, int32_t DIM>
+  void operator()(legate::Store& output, legate::Store& input)
+  {
+    using VAL = legate::legate_type_of<CODE>;
+
+    auto shape = input.shape<DIM>();
+
+    if (shape.empty()) return;
+
+    auto in_acc  = input.read_accessor<VAL, DIM>();
+    auto red_acc = output.reduce_accessor<legate::SumReduction<VAL>, true, DIM>();
+
+    for (legate::PointInRectIterator<DIM> it(shape, false /*fortran_order*/); it.valid(); ++it) {
+      auto p = *it;
+      // Coordinates of the contracting dimension are ignored by red_acc via an affine
+      // transformation. For example, if the store was 3D and the second dimension was contracted,
+      // each point p will go through the following affine transformation to recover the point in
+      // the domain prior to the promotion:
+      //
+      //     | 1  0  0 |     | x |
+      //     |         |  *  | y |
+      //     | 0  0  1 |     | z |
+      //
+      // where the "*" operator denotes a matrix-vector multiplication.
+      red_acc.reduce(p, in_acc[p]);
+    }
+  }
+};
+
+}  // namespace
+
+class SumOverAxisTask : public Task<SumOverAxisTask, SUM_OVER_AXIS> {
+ public:
+  static void cpu_variant(legate::TaskContext& context)
+  {
+    auto& input  = context.inputs()[0];
+    auto& output = context.reductions()[0];
+
+    legate::double_dispatch(input.dim(), input.code(), reduction_fn{}, output, input);
+  }
+};
+
+}  // namespace legate_raft
+
+namespace {
+
+static void __attribute__((constructor)) register_tasks()
+{
+  legate_raft::SumOverAxisTask::register_variants();
+}
+
+}  // namespace
diff --git a/legate/CMakeLists.txt b/legate/CMakeLists.txt
new file mode 100644
index 0000000000..54214023b9
--- /dev/null
+++ b/legate/CMakeLists.txt
@@ -0,0 +1,74 @@
+# =============================================================================
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+
+
+function(legate_python_library_template py_target cpp_target output_dir)
+set(file_template
+[=[
+from legate.core import (
+    Library,
+    ResourceConfig,
+    get_legate_runtime,
+)
+import os
+from typing import Any
+
+class UserLibrary(Library):
+    def __init__(self, name: str) -> None:
+        self.name = name
+        self.shared_object: Any = None
+
+    @property
+    def cffi(self) -> Any:
+        return self.shared_object
+
+    def get_name(self) -> str:
+        return self.name
+
+    def get_shared_library(self) -> str:
+        from @py_target@.install_info import libpath
+        return os.path.join(libpath, f"lib@cpp_target@{self.get_library_extension()}")
+
+    def get_c_header(self) -> str:
+        from @py_target@.install_info import header
+
+        return header
+
+    def get_registration_callback(self) -> str:
+        return "@cpp_target@_perform_registration"
+
+    def get_resource_configuration(self) -> ResourceConfig:
+        assert self.shared_object is not None
+        config = ResourceConfig()
+        config.max_tasks = 1024
+        config.max_mappers = 1
+        config.max_reduction_ops = 8
+        config.max_projections = 0
+        config.max_shardings = 0
+        return config
+
+    def initialize(self, shared_object: Any) -> None:
+        self.shared_object = shared_object
+
+    def destroy(self) -> None:
+        pass
+
+user_lib = UserLibrary("@cpp_target@")
+user_context = get_legate_runtime().register_library(user_lib)
+]=])
+string(CONFIGURE "${file_template}" file_content @ONLY)
+file(WRITE ${CMAKE_SOURCE_DIR}/${output_dir}/library.py "${file_content}")
+endfunction()
+
+
+legate_python_library_template(legate.raft legate_raft legate/raft)
diff --git a/legate/__init__.py b/legate/__init__.py
new file mode 100644
index 0000000000..551c81b25d
--- /dev/null
+++ b/legate/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2021 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pkgutil import extend_path
+
+__path__ = extend_path(__path__, __name__)
diff --git a/legate/raft/__init__.py b/legate/raft/__init__.py
new file mode 100644
index 0000000000..f88da98984
--- /dev/null
+++ b/legate/raft/__init__.py
@@ -0,0 +1,33 @@
+from .core import (
+    add,
+    array_to_store,
+    bincount,
+    categorize,
+    convert,
+    exp,
+    fill,
+    log,
+    matmul,
+    multiply,
+    negative,
+    store_to_array,
+    subtract,
+    sum_over_axis,
+)
+
+__all__ = [
+    "add",
+    "array_to_store",
+    "bincount",
+    "categorize",
+    "convert",
+    "exp",
+    "fill",
+    "log",
+    "matmul",
+    "multiply",
+    "negative",
+    "store_to_array",
+    "subtract",
+    "sum_over_axis",
+]
diff --git a/legate/raft/cffi.py b/legate/raft/cffi.py
new file mode 100644
index 0000000000..fff229f2d2
--- /dev/null
+++ b/legate/raft/cffi.py
@@ -0,0 +1,19 @@
+from enum import IntEnum
+
+from .library import user_lib
+
+
+class OpCode(IntEnum):
+    ADD = user_lib.cffi.ADD
+    ADD_CONSTANT = user_lib.cffi.ADD_CONSTANT
+    BINCOUNT = user_lib.cffi.BINCOUNT
+    CATEGORIZE = user_lib.cffi.CATEGORIZE
+    CONVERT = user_lib.cffi.CONVERT
+    EXP = user_lib.cffi.EXP
+    FILL = user_lib.cffi.FILL
+    FIND_MAX = user_lib.cffi.FIND_MAX
+    HISTOGRAM = user_lib.cffi.HISTOGRAM
+    LOG = user_lib.cffi.LOG
+    MATMUL = user_lib.cffi.MATMUL
+    MUL = user_lib.cffi.MUL
+    SUM_OVER_AXIS = user_lib.cffi.SUM_OVER_AXIS
diff --git a/legate/raft/core.py b/legate/raft/core.py
new file mode 100644
index 0000000000..2de053db6b
--- /dev/null
+++ b/legate/raft/core.py
@@ -0,0 +1,446 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from dataclasses import dataclass
+from numbers import Number
+
+import numpy as np
+import pyarrow as pa
+
+import legate.core.types as ty
+from legate.core import Store
+from legate.core._legion.future import Future
+
+from .cffi import OpCode
+from .library import user_context as context
+from .util import promote
+
+
+@dataclass
+class _NDArray:
+    shape: tuple[int]
+    typestr: str
+    ptr: int
+    strides: tuple[int] | None
+    read_only: bool
+    version: int = 3
+
+    @property
+    def __array_interface__(self):
+        return {
+            "version": self.version,
+            "shape": self.shape,
+            "typestr": self.typestr,
+            "data": (self.ptr, self.read_only),
+            "strides": self.strides,
+        }
+
+
+def array_to_store(array: np.ndarray) -> Store:
+    store = context.create_store(
+        pa.from_numpy_dtype(array.dtype),
+        shape=array.shape,
+        optimize_scalar=False,
+    )
+    store.attach_external_allocation(
+        context,
+        array.data,
+        share=False,
+    )
+    return store
+
+
+def store_to_array(store: Store) -> np.ndarray:
+    if store.kind is Future:
+        dtype = store.get_dtype()
+        buf = store.storage.get_buffer(dtype.size)
+        result = np.frombuffer(buf, dtype=dtype.type.to_pandas_dtype(), count=1)
+        return result
+    else:
+        assert store.shape != ()
+
+        alloc = store.get_inline_allocation(context)
+
+        def construct_ndarray(shape, address, strides):
+            dtype = np.dtype(store.get_dtype().type.to_pandas_dtype())
+
+            initializer = _NDArray(shape, dtype.str, address, strides, False)
+            result = np.asarray(initializer)
+            return result
+
+        result = alloc.consume(construct_ndarray)
+        return result
+
+
+def convert(input: Store, dtype: pa.DataType) -> Store:
+    dtype = context.type_system[dtype]
+    result = context.create_store(dtype, input.shape)
+    task = context.create_auto_task(OpCode.CONVERT)
+    task.add_input(input)
+    task.add_output(result)
+    task.add_alignment(input, result)
+    task.execute()
+
+    return result
+
+
+def _sanitize_axis(axis: int, ndim: int) -> int:
+    sanitized = axis
+    if sanitized < 0:
+        sanitized += ndim
+    if sanitized < 0 or sanitized >= ndim:
+        raise ValueError(f"Invalid axis {axis} for a {ndim}-D store")
+    return sanitized
+
+
+def sum_over_axis(input: Store, axis: int) -> Store:
+    """
+    Sum values along the chosen axis
+
+    Parameters
+    ----------
+    input : Store
+        Input to sum
+    axis : int
+        Axis along which the summation should be done
+
+    Returns
+    -------
+    Store
+        Summation result
+    """
+    sanitized = _sanitize_axis(axis, input.ndim)
+
+    # Compute the output shape by removing the axis being summed over
+    res_shape = tuple(ext for dim, ext in enumerate(input.shape) if dim != sanitized)
+    result = fill(res_shape, 0, dtype=input.type)
+
+    # Broadcast the output along the contracting dimension
+    promoted = result.promote(axis, input.shape[axis])
+
+    assert promoted.shape == input.shape
+
+    task = context.create_auto_task(OpCode.SUM_OVER_AXIS)
+    task.add_input(input)
+    task.add_reduction(promoted, ty.ReductionOp.ADD)
+    task.add_alignment(input, promoted)
+
+    task.execute()
+
+    return result
+
+
+def multiply(rhs1: Store, rhs2: Store) -> Store:
+    if rhs1.type != rhs2.type or rhs1.shape != rhs2.shape:
+        raise ValueError("Stores to add must have the same type and shape")
+
+    result = context.create_store(rhs1.type.type, rhs1.shape)
+
+    task = context.create_auto_task(OpCode.MUL)
+    task.add_input(rhs1)
+    task.add_input(rhs2)
+    task.add_output(result)
+    task.add_alignment(result, rhs1)
+    task.add_alignment(result, rhs2)
+
+    task.execute()
+
+    return result
+
+
+def matmul(rhs1: Store, rhs2: Store) -> Store:
+    """
+    Performs matrix multiplication
+
+    Parameters
+    ----------
+    rhs1, rhs2 : Store
+        Matrices to multiply
+
+    Returns
+    -------
+    Store
+        Multiplication result
+    """
+    if rhs1.ndim != 2 or rhs2.ndim != 2:
+        raise ValueError("Stores must be 2D")
+    if rhs1.type != rhs2.type:
+        raise ValueError("Stores must have the same type")
+    if rhs1.shape[1] != rhs2.shape[0]:
+        raise ValueError(
+            "Can't do matrix mulplication between arrays of "
+            f"shapes {rhs1.shape} and {rhs1.shape}"
+        )
+
+    m = rhs1.shape[0]
+    k = rhs1.shape[1]
+    n = rhs2.shape[1]
+
+    # Multiplying an (m, k) matrix with a (k, n) matrix gives
+    # an (m, n) matrix
+    result = fill((m, n), 0, dtype=rhs1.type)
+
+    # Each store gets a fake dimension that it doesn't have
+    rhs1 = rhs1.promote(2, n)
+    rhs2 = rhs2.promote(0, m)
+    lhs = result.promote(1, k)
+
+    assert lhs.shape == rhs1.shape
+    assert lhs.shape == rhs2.shape
+
+    task = context.create_auto_task(OpCode.MATMUL)
+    task.add_input(rhs1)
+    task.add_input(rhs2)
+    task.add_reduction(lhs, ty.ReductionOp.ADD)
+    task.add_alignment(lhs, rhs1)
+    task.add_alignment(lhs, rhs2)
+
+    task.execute()
+
+    return result
+
+
+def bincount(input: Store, num_bins: int) -> Store:
+    """
+    Counts the occurrences of each bin index
+
+    Parameters
+    ----------
+    input : Store
+        Input to bin-count
+    num_bins : int
+        Number of bins
+
+    Returns
+    -------
+    Store
+        Counting result
+    """
+    result = fill((num_bins,), 0, ty.uint64)
+
+    task = context.create_auto_task(OpCode.BINCOUNT)
+    task.add_input(input)
+    # Broadcast the result store. This commands the Legate runtime to give
+    # the entire store to every task instantiated by this task descriptor
+    task.add_broadcast(result)
+    # Declares that the tasks will do reductions to the result store and
+    # that outputs from the tasks should be combined by addition
+    task.add_reduction(result, ty.ReductionOp.ADD)
+
+    task.execute()
+
+    return result
+
+
+def categorize(input: Store, bins: Store) -> Store:
+    result = context.create_store(ty.uint64, input.shape)
+
+    task = context.create_auto_task(OpCode.CATEGORIZE)
+    task.add_input(input)
+    task.add_input(bins)
+    task.add_output(result)
+
+    # Broadcast the store that contains bin edges. Each task will get a copy
+    # of the entire bin edges
+    task.add_broadcast(bins)
+
+    task.execute()
+
+    return result
+
+
+def histogram(input: Store, bins: Store) -> Store:
+    """
+    Constructs a histogram for the given bins
+
+    Parameters
+    ----------
+    input : Store
+        Input
+    bins : int
+        Bin edges
+
+    Returns
+    -------
+    Store
+        Histogram
+    """
+    num_bins = bins.shape[0] - 1
+    result = fill((num_bins,), 0, ty.uint64)
+
+    task = context.create_auto_task(OpCode.HISTOGRAM)
+    task.add_input(input)
+    task.add_input(bins)
+    task.add_reduction(result, ty.ReductionOp.ADD)
+
+    # Broadcast both the result store and the one that contains bin edges.
+    task.add_broadcast(bins)
+    task.add_broadcast(result)
+
+    task.execute()
+
+    return result
+
+
+def _add_constant(input: Store, value: Number) -> Store:
+    result = context.create_store(input.type, input.shape)
+
+    task = context.create_auto_task(OpCode.ADD_CONSTANT)
+    task.add_input(input)
+    task.add_scalar_arg(value, input.type)
+    task.add_output(result)
+    task.add_alignment(input, result)
+
+    task.execute()
+
+    return result
+
+
+def log(input: Store) -> Store:
+    result = context.create_store(input.type, input.shape)
+
+    task = context.create_auto_task(OpCode.LOG)
+    task.add_input(input)
+    task.add_output(result)
+    task.add_alignment(input, result)
+
+    task.execute()
+
+    return result
+
+
+def exp(input: Store) -> Store:
+    result = context.create_store(input.type, input.shape)
+
+    task = context.create_auto_task(OpCode.EXP)
+    task.add_input(input)
+    task.add_output(result)
+    task.add_alignment(input, result)
+
+    task.execute()
+
+    return result
+
+
+def fill(shape, fill_value, dtype=None) -> Store:
+    if dtype is None:
+        try:
+            dtype = pa.from_numpy_dtype(fill_value.dtype)
+        except AttributeError:
+            fill_value = np.asanyarray(fill_value)
+            dtype = pa.from_numpy_dtype(fill_value.dtype)
+
+    result = context.create_store(dtype, shape)
+    assert result.type == dtype
+
+    task = context.create_auto_task(OpCode.FILL)
+    task.add_output(result)
+    task.add_scalar_arg(fill_value, result.type)
+    task.execute()
+
+    return result
+
+
+def add(x1: Store | Number, x2: Store | Number) -> Store | Number:
+    if isinstance(x1, Number):
+        if isinstance(x2, Number):
+            return x1 + x2  # native function
+        else:
+            return add(x2, x1)  # swap operands
+
+    elif isinstance(x2, Number):
+        return _add_constant(x1, x2)
+    elif x1.shape == x2.shape:
+        return _add_stores(x1, x2)
+    else:
+        return _add_broadcast(x1, x2)
+
+
+def subtract(x1: Store | Number, x2: Store | Number) -> Store | Number:
+    if isinstance(x1, Number) and isinstance(x2, Number):
+        return x1 - x2  # native function
+    else:
+        return add(x1, negative(x2))
+
+
+def _add_stores(x1: Store, x2: Store) -> Store:
+    result = context.create_store(x1.type, x1.shape)
+
+    task = context.create_auto_task(OpCode.ADD)
+    task.add_input(x1)
+    task.add_input(x2)
+    task.add_output(result)
+    task.add_alignment(x1, x2)
+    task.add_alignment(x1, result)
+
+    task.execute()
+
+    return result
+
+
+def negative(lhs: Store) -> Store:
+    minus_one = fill((lhs.shape), lhs.type.type.to_pandas_dtype()(-1))
+    result = context.create_store(lhs.type, lhs.shape)
+
+    task = context.create_auto_task(OpCode.MUL)
+    task.add_input(lhs)
+    task.add_input(minus_one)
+    task.add_alignment(lhs, minus_one)
+    task.add_alignment(lhs, result)
+    task.add_output(result)
+    task.execute()
+
+    return result
+
+
+def _add_broadcast(x1: Store, x2: Store) -> Store:
+    def func(dim, dim_size):
+        nonlocal x2
+        x2 = x2.promote(dim, dim_size)
+
+    promote(x2.shape, x1.shape, func)
+    assert x1.shape == x2.shape
+
+    result = context.create_store(x1.type, x1.shape)
+    task = context.create_auto_task(OpCode.ADD)
+    task.add_input(x1)
+    task.add_input(x2)
+    task.add_alignment(x1, x2)
+    task.add_output(result)
+    task.add_alignment(x1, result)
+
+    task.execute()
+
+    return result
+
+
+def max(x: Store, axis: int) -> Number:
+    sanitized = _sanitize_axis(axis, x.ndim)
+
+    limit_min = np.finfo(x.type.type.to_pandas_dtype()).min
+    res_shape = tuple(ext for dim, ext in enumerate(x.shape) if dim != sanitized)
+    result = fill(res_shape, limit_min, x.type)
+
+    promoted = result.promote(axis, x.shape[axis])
+    assert promoted.shape == x.shape
+
+    task = context.create_auto_task(OpCode.FIND_MAX)
+    task.add_input(x)
+    task.add_reduction(promoted, ty.ReductionOp.MAX)
+    task.add_alignment(x, promoted)
+
+    task.execute()
+
+    return result
diff --git a/legate/raft/special.py b/legate/raft/special.py
new file mode 100644
index 0000000000..3ea24eba8d
--- /dev/null
+++ b/legate/raft/special.py
@@ -0,0 +1,34 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from legate.core import Store
+
+from .core import add, exp, log
+from .core import max as lg_max
+from .core import subtract, sum_over_axis
+
+
+def logsumexp(x: Store, axis: int) -> Store:
+    # The implementation below implements the following operations
+    # expressed via the numpy API:
+    # c = x.max()
+    # c + np.log(np.sum(np.exp(x - c)))
+
+    x_max = lg_max(x, axis=axis)
+    tmp0 = subtract(x.transpose((1, 0)), x_max).transpose((1, 0))
+    tmp = exp(tmp0)
+    s = sum_over_axis(tmp, axis=axis)
+    out = log(s)
+    ret = add(out, x_max)
+    return ret
diff --git a/legate/raft/tests.py b/legate/raft/tests.py
new file mode 100644
index 0000000000..5e66b8ace1
--- /dev/null
+++ b/legate/raft/tests.py
@@ -0,0 +1,56 @@
+import numpy as np
+import pytest
+from hypothesis import assume, example, given, note, settings
+from hypothesis import strategies as st
+from hypothesis.extra.numpy import array_shapes
+
+from .util import broadcast_shape, promote
+
+
+@given(a=array_shapes(), b=array_shapes())
+@settings(max_examples=1000)
+def test_broadcast(a, b):
+    try:
+        broadcasted_shape = broadcast_shape(a, b)
+    except ValueError:
+        note(f"incompatible {a} {b}")
+        with pytest.raises(ValueError):
+            np.ones(a) + np.ones(b)
+    else:
+        note(f"compatible: {a} {b} -> {broadcasted_shape}")
+        assert broadcasted_shape == (np.ones(a) + np.ones(b)).shape
+
+
+@st.composite
+def promotable_shapes(draw):
+    to_shape = draw(array_shapes(min_dims=2))
+
+    num_dims_to_remove = draw(st.integers(min_value=1, max_value=len(to_shape) - 1))
+    front = draw(st.booleans())
+    if front:
+        from_shape = tuple(to_shape[num_dims_to_remove:])
+    else:
+        from_shape = tuple(to_shape[: len(to_shape) - num_dims_to_remove])
+
+    try:
+        assume(broadcast_shape(from_shape, to_shape) == to_shape)
+    except ValueError:
+        assume(False)
+
+    return from_shape, to_shape
+
+
+@example(((2,), (1, 2, 3)))
+@given(shapes=promotable_shapes())
+@settings(max_examples=1000)
+def test_promote(shapes):
+    from_shape, to_shape = shapes
+
+    try:
+        broadcast_shape(from_shape, to_shape)
+    except ValueError:
+        pass
+    else:
+        promoted = list(from_shape)
+        promote(from_shape, to_shape, lambda dim, size: promoted.insert(dim, size))
+        assert tuple(promoted) == to_shape
diff --git a/legate/raft/util.py b/legate/raft/util.py
new file mode 100644
index 0000000000..b6ebb17dc8
--- /dev/null
+++ b/legate/raft/util.py
@@ -0,0 +1,70 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+def broadcast_shape(A, B):
+    # Add additional dimensions as needed.
+    max_dim = max(len(A), len(B))
+    A_ = tuple(A[::-1][i] if i < len(A) else B[::-1][i] for i in range(max_dim))
+    B_ = tuple(B[::-1][i] if i < len(B) else A[::-1][i] for i in range(max_dim))
+    assert len(A_) == len(B_) == max_dim
+    A_, B_ = tuple(A_), tuple(B_)
+
+    # Stretch existing dimensions as needed.
+    def stretch():
+        for a, b in zip(A_, B_):
+            if a != b:
+                if 1 in (a, b):
+                    yield max(a, b)
+                else:
+                    raise ValueError(f"Unable to broadcast together {A} and {B}.")
+            else:
+                yield a
+
+    ret = tuple(stretch())
+    assert len(ret) == max_dim
+    return tuple(reversed(ret))
+
+
+def _find_sequence(sequence, sub_sequence):
+    assert len(sub_sequence) <= len(sequence)
+
+    n = len(sub_sequence)
+
+    for i in range(len(sequence) - n + 1):
+        if sequence[i : i + n] == sub_sequence:
+            return i
+    raise IndexError
+
+
+def promote(from_shape, to_shape, promote_func):
+    try:
+        assert len(to_shape) > len(from_shape)
+        bs = broadcast_shape(from_shape, to_shape)
+        for a, b in zip(to_shape, bs):
+            assert a == b
+    except AssertionError:
+        raise ValueError(f"Unable to promote {from_shape} to {to_shape}")
+
+    try:
+        idx_start = _find_sequence(to_shape, from_shape)
+        idx_stop = idx_start + len(from_shape)
+    except IndexError:
+        raise ValueError(f"Unable to promote {from_shape} to {to_shape}")
+
+    for i in range(idx_start):
+        promote_func(i, to_shape[i])
+    for i in range(idx_stop, len(to_shape)):
+        promote_func(i, to_shape[i])
diff --git a/naive_bayes/__init__.py b/naive_bayes/__init__.py
new file mode 100644
index 0000000000..a4cc9240b7
--- /dev/null
+++ b/naive_bayes/__init__.py
@@ -0,0 +1,5 @@
+from .multinomial import MultinomialNB
+
+__all__ = [
+    "MultinomialNB",
+]
diff --git a/naive_bayes/cn/__init__.py b/naive_bayes/cn/__init__.py
new file mode 100644
index 0000000000..2e0b3eab48
--- /dev/null
+++ b/naive_bayes/cn/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from .multinomial import MultinomialNB
+
+__all__ = ["MultinomialNB"]
diff --git a/naive_bayes/cn/multinomial.py b/naive_bayes/cn/multinomial.py
new file mode 100644
index 0000000000..d587756db5
--- /dev/null
+++ b/naive_bayes/cn/multinomial.py
@@ -0,0 +1,100 @@
+# Part of this code was adapted from sklearn/naive_bayes.py which
+# is distributed under the following copyright:
+#     Copyright (c) 2007–2020 The scikit-learn developers.
+# and licensed under the New BSD License (see LICENSE.scikit_learn).
+
+# import numpy as np
+import cunumeric as np
+from scipy.special import logsumexp
+from sklearn.preprocessing import LabelBinarizer
+from sklearn.utils.extmath import safe_sparse_dot
+
+
+def sparse_dot(a, b):
+    # TODO: Actually implement handling of sparse matrices.
+    # cunumeric does not support integer types for this operation.
+    # TODO: Handle a.dtype != b.dtype
+    return np.asarray(
+        safe_sparse_dot(
+            np.asarray(a, dtype=np.float64), np.asarray(b, dtype=np.float64)
+        ),
+        dtype=a.dtype,
+    )
+
+
+def sum_(a, axis=None):
+    return np.sum(a, axis=axis)
+
+
+class MultinomialNB:
+    def __init__(self, alpha: float = 1.0, fit_prior=True, class_prior=None):
+        self.alpha = alpha
+        self.fit_prior = False
+        self.class_prior = class_prior
+
+    def fit(self, X, y):
+        # convert to cunumeric arrays
+        X = np.asarray(X)
+        y = np.asarray(y)
+        _, n_features = X.shape
+        self.n_features_in_ = n_features
+
+        labelbin = LabelBinarizer()
+        Y = np.asarray(labelbin.fit_transform(y))  # need to convert output again
+
+        self.classes_ = labelbin.classes_
+        assert Y.shape[1] != 1
+
+        n_classes = Y.shape[1]
+        self._init_counters(n_classes, n_features)
+        self._count(X, Y)
+        self._update_feature_log_proba(self.alpha)
+        self._update_class_log_prior(class_prior=self.class_prior)
+
+        return self
+
+    def _count(self, X, Y):
+        self.feature_count_ += sparse_dot(Y.T, X)
+        self.class_count_ += sum_(Y, axis=0)
+
+    def _init_counters(self, n_classes, n_features):
+        self.class_count_ = np.zeros(n_classes, dtype=np.float64)
+        self.feature_count_ = np.zeros((n_classes, n_features), dtype=np.float64)
+
+    def _update_feature_log_proba(self, alpha):
+        # Adapted from sklearn/naive_bayes.py
+        smoothed_fc = self.feature_count_ + alpha
+        smoothed_cc = sum_(smoothed_fc, axis=1)
+
+        self.feature_log_prob_ = np.log(smoothed_fc) - np.log(
+            smoothed_cc.reshape(-1, 1)
+        )
+
+    def _update_class_log_prior(self, class_prior=None):
+        n_classes = len(self.classes_)
+        if class_prior is not None:
+            assert len(class_prior) == n_classes
+            self.class_log_prior_ = np.log(class_prior)
+        elif self.fit_prior:
+            raise NotImplementedError
+        else:
+            self.class_log_prior_ = np.full(n_classes, -np.log(n_classes))
+
+    def _joint_log_likelihood(self, X):
+        return sparse_dot(X, self.feature_log_prob_.T) + self.class_log_prior_
+
+    def predict_log_proba(self, X):
+        X = np.asarray(X)
+        jll = self._joint_log_likelihood(X)
+        # normalize by P(x) = P(f_1, ..., f_n)
+        log_prob_x = logsumexp(jll, axis=1)
+        return jll - np.atleast_2d(log_prob_x).T
+
+    def predict_proba(self, X):
+        X = np.asarray(X)
+        return np.exp(self.predict_log_proba(X))
+
+    def predict(self, X):
+        X = np.asarray(X)
+        jll = self._joint_log_likelihood(X)
+        return self.classes_[np.argmax(jll, axis=1)]
diff --git a/naive_bayes/conftest.py b/naive_bayes/conftest.py
new file mode 100644
index 0000000000..7ac2b1fa6a
--- /dev/null
+++ b/naive_bayes/conftest.py
@@ -0,0 +1,22 @@
+import numpy as np
+import pytest
+from sklearn.datasets import fetch_20newsgroups
+from sklearn.feature_extraction.text import CountVectorizer
+
+
+def _nlp_20news():
+    try:
+        twenty_train = fetch_20newsgroups(subset="train", shuffle=True, random_state=42)
+    except:  # noqa E722
+        pytest.xfail(reason="Error fetching 20 newsgroup dataset")
+
+    count_vect = CountVectorizer()
+    X = count_vect.fit_transform(twenty_train.data)
+    Y = np.array(twenty_train.target)
+
+    return X, Y
+
+
+@pytest.fixture(scope="module")
+def nlp_20news():
+    return _nlp_20news()
diff --git a/naive_bayes/helper.py b/naive_bayes/helper.py
new file mode 100644
index 0000000000..3a9aa27a07
--- /dev/null
+++ b/naive_bayes/helper.py
@@ -0,0 +1,49 @@
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from dataclasses import dataclass
+from typing import Any
+
+import cunumeric as num
+import pyarrow as pa
+
+from legate.core import Array, Store
+
+
+@dataclass
+class _StoreWrapper:
+    store: Store
+
+    @property
+    def __legate_data_interface__(self) -> dict[str, Any]:
+        """
+        Constructs a Legate data interface object from a store wrapped in this
+        object
+        """
+        dtype = self.store.type.type
+        array = Array(dtype, [None, self.store])
+
+        # Create a field metadata to populate the data field
+        field = pa.field("Array", dtype, nullable=False)
+
+        return {
+            "version": 1,
+            "data": {field: array},
+        }
+
+
+def to_cunumeric_array(store: Store) -> num.ndarray:
+    return num.asarray(_StoreWrapper(store))
diff --git a/naive_bayes/multinomial.py b/naive_bayes/multinomial.py
new file mode 100644
index 0000000000..57397a0fea
--- /dev/null
+++ b/naive_bayes/multinomial.py
@@ -0,0 +1,120 @@
+# Part of this code was adapted from sklearn/naive_bayes.py which
+# is distributed under the following copyright:
+#     Copyright (c) 2007–2020 The scikit-learn developers.
+# and licensed under the New BSD License (see LICENSE.scikit_learn).
+
+import numpy as np
+import pyarrow as pa
+
+# import cunumeric as np
+from sklearn.preprocessing import LabelBinarizer
+
+from legate.raft import (
+    add,
+    array_to_store,
+    convert,
+    exp,
+    fill,
+    log,
+    matmul,
+    store_to_array,
+    subtract,
+    sum_over_axis,
+)
+from legate.raft.special import logsumexp
+
+
+class MultinomialNB:
+    def __init__(self, alpha: float = 1.0, fit_prior=True, class_prior=None):
+        self.alpha = alpha
+        self.fit_prior = False
+        self.class_prior = class_prior
+        self._feature_log_prob_ = None
+        self._class_log_prior_ = None
+
+    def fit(self, X, y):
+        # Convert to a legate stores
+        X = array_to_store(X)
+        _, n_features = X.shape
+        self.n_features_in_ = n_features
+
+        labelbin = LabelBinarizer()
+        Y = array_to_store(labelbin.fit_transform(y))
+
+        self.classes_ = labelbin.classes_
+        assert Y.shape[1] != 1
+
+        Y_transposed = Y.transpose((1, 0))
+        self.feature_count_ = matmul(Y_transposed, X)
+        self.class_count_ = sum_over_axis(Y, axis=0)
+        self._update_feature_log_proba(self.alpha)
+        self._update_class_log_prior(class_prior=self.class_prior)
+
+        return self
+
+    def _update_feature_log_proba(self, alpha, dtype=pa.float64()):
+        # Adapted from sklearn/naive_bayes.py
+
+        fc_ = convert(self.feature_count_, dtype)
+        smoothed_fc = add(fc_, alpha)
+        smoothed_cc = sum_over_axis(smoothed_fc, axis=1)
+
+        # self.feature_log_prob_ = np.log(smoothed_fc) - np.log(
+        #     smoothed_cc.reshape(-1, 1)
+        # )
+        x1 = log(smoothed_fc)
+        x2 = log(smoothed_cc)
+        self._feature_log_prob_ = subtract(x1.transpose((1, 0)), x2).transpose((1, 0))
+
+    @property
+    def feature_log_prob_(self):
+        if self._feature_log_prob_ is not None:
+            return store_to_array(self._feature_log_prob_)
+
+    @property
+    def class_log_prior_(self):
+        if self._class_log_prior_ is not None:
+            return store_to_array(self._class_log_prior_)
+
+    def _update_class_log_prior(self, class_prior=None):
+        n_classes = len(self.classes_)
+        if class_prior is not None:
+            assert len(class_prior) == n_classes
+            self._class_log_prior_ = log(class_prior)
+        elif self.fit_prior:
+            raise NotImplementedError
+        else:
+            self._class_log_prior_ = fill(n_classes, -np.log(n_classes))
+
+    def _joint_log_likelihood(self, X):
+        X_converted = convert(X, pa.float64())
+
+        x1 = matmul(X_converted, self._feature_log_prob_.transpose((1, 0)))
+        x2 = self._class_log_prior_
+        return add(x1, x2)
+
+    def _predict_log_proba(self, X):
+        jll = self._joint_log_likelihood(X)
+        # normalize by P(x) = P(f_1, ..., f_n)
+
+        log_prob_x = logsumexp(jll, axis=1)
+
+        x1 = jll
+        x2 = log_prob_x
+        return subtract(x1.transpose((1, 0)), x2).transpose((1, 0))
+
+    def predict_log_proba(self, X):
+        X = array_to_store(X)
+        ret = self._predict_log_proba(X)
+        return store_to_array(ret)
+
+    def predict_proba(self, X):
+        X = array_to_store(X)
+        ret = exp(self._predict_log_proba(X))
+        return store_to_array(ret)
+
+    def predict(self, X):
+        X = array_to_store(X)
+        jll = store_to_array(self._joint_log_likelihood(X))
+        ret = self.classes_[np.argmax(jll, axis=1)]
+        return ret
diff --git a/naive_bayes/tests.py b/naive_bayes/tests.py
new file mode 100644
index 0000000000..303e5d8642
--- /dev/null
+++ b/naive_bayes/tests.py
@@ -0,0 +1,55 @@
+# import numpy as np
+import cunumeric as np
+from numpy.testing import assert_allclose
+from sklearn.metrics import accuracy_score
+from sklearn.naive_bayes import MultinomialNB as skNB
+
+from naive_bayes import MultinomialNB
+from naive_bayes.cn.multinomial import MultinomialNB as CNMultinomialNB
+
+
+def test_multinomial(nlp_20news):
+    X_sparse, y = nlp_20news
+    n_rows = 500
+    n_cols = 10000
+
+    X = X_sparse[:n_rows, :n_cols]
+    # TODO: Implement support for sparse arrays.
+    X = np.ascontiguousarray(X.todense())
+    y = y[:n_rows]
+
+    legate_model = MultinomialNB()
+    cn_legate_model = CNMultinomialNB()
+    sk_model = skNB()
+
+    sk_model.fit(X, y)
+    cn_legate_model.fit(X, y)
+    legate_model.fit(X, y)
+
+    sk_log_proba = sk_model.predict_log_proba(X)
+    legate_log_proba = legate_model.predict_log_proba(X)
+    cn_legate_log_proba = cn_legate_model.predict_log_proba(X)
+    sk_proba = sk_model.predict_proba(X)
+    legate_proba = legate_model.predict_proba(X)
+    cn_legate_proba = cn_legate_model.predict_proba(X)
+    # sk_score = sk_model.score(X, y)
+    # legate_score = legate_model.score(X, y)
+
+    y_sk = sk_model.predict(X)
+    y_legate = legate_model.predict(X)
+    y_cn_legate = cn_legate_model.predict(X)
+
+    assert_allclose(cn_legate_log_proba, sk_log_proba, atol=5e-1, rtol=5e-1)
+    assert_allclose(cn_legate_proba, sk_proba, atol=2e-1, rtol=2.5)
+    assert_allclose(legate_log_proba, sk_log_proba, atol=5e-1, rtol=5e-1)
+    assert_allclose(legate_proba, sk_proba, atol=2e-1, rtol=2.5)
+    assert accuracy_score(y, y_sk) >= 0.45
+    assert accuracy_score(y, y_cn_legate) >= 0.45
+    assert accuracy_score(y, y_legate) >= 0.45
+    print("PASS")
+
+
+if __name__ == "__main__":
+    from conftest import _nlp_20news
+
+    test_multinomial(_nlp_20news())
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000000..19d6e5c115
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,20 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+[versioneer]
+VCS = git
+style = pep440
+tag_prefix = v
+
+[flake8]
+max-line-length = 88
+extend-ignore = E203
+
+[isort]
+profile = black
+
+[options]
+packages = find:
+install_requires =
+    numpy>=1.22
+    # TODO: Add rest of install dependencies
+python_requires = >=3.9,!=3.9.7
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000000..2b30137323
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+# Copyright 2023 NVIDIA Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import setuptools
+
+setuptools.setup(
+    name="legate.raft",
+    packages=setuptools.find_packages(),
+    setup_requires=["scikit-build"],
+    cmake_args=["-DCMAKE_INSTALL_PREFIX=./install"],
+    zip_safe=False,
+)