From f3af69925e5321563dea82d8d2ae463faa45ba44 Mon Sep 17 00:00:00 2001
From: Raymond Douglass <ray@raydouglass.com>
Date: Fri, 23 Sep 2022 11:38:52 -0400
Subject: [PATCH 001/202] DOC

---
 CHANGELOG.md                             | 4 ++++
 ci/checks/style.sh                       | 2 +-
 ci/gpu/build.sh                          | 2 +-
 ci/gpu/java.sh                           | 2 +-
 conda/environments/cudf_dev_cuda11.5.yml | 4 ++--
 cpp/CMakeLists.txt                       | 2 +-
 cpp/doxygen/Doxyfile                     | 4 ++--
 cpp/examples/basic/CMakeLists.txt        | 2 +-
 cpp/libcudf_kafka/CMakeLists.txt         | 2 +-
 docs/cudf/source/conf.py                 | 4 ++--
 fetch_rapids.cmake                       | 2 +-
 java/src/main/native/CMakeLists.txt      | 2 +-
 python/cudf/CMakeLists.txt               | 2 +-
 13 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 89049dff3b6..092b62d6c63 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuDF 22.12.00 (Date TBD)
+
+Please see https://github.com/rapidsai/cudf/releases/tag/v22.12.00a for the latest changes to this development branch.
+
 # cuDF 22.10.00 (Date TBD)
 
 Please see https://github.com/rapidsai/cudf/releases/tag/v22.10.00a for the latest changes to this development branch.
diff --git a/ci/checks/style.sh b/ci/checks/style.sh
index de3f8c01d83..29f5474fd87 100755
--- a/ci/checks/style.sh
+++ b/ci/checks/style.sh
@@ -14,7 +14,7 @@ LANG=C.UTF-8
 . /opt/conda/etc/profile.d/conda.sh
 conda activate rapids
 
-FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/cmake-format-rapids-cmake.json
+FORMAT_FILE_URL=https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/cmake-format-rapids-cmake.json
 export RAPIDS_CMAKE_FORMAT_FILE=/tmp/rapids_cmake_ci/cmake-formats-rapids-cmake.json
 mkdir -p $(dirname ${RAPIDS_CMAKE_FORMAT_FILE})
 wget -O ${RAPIDS_CMAKE_FORMAT_FILE} ${FORMAT_FILE_URL}
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 118bdb263af..f3c302173c8 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG
 export INSTALL_DASK_MAIN=1
 
 # ucx-py version
-export UCX_PY_VERSION='0.28.*'
+export UCX_PY_VERSION='0.29.*'
 
 ################################################################################
 # TRAP - Setup trap for removing jitify cache
diff --git a/ci/gpu/java.sh b/ci/gpu/java.sh
index b110303662b..e1d3bab2bc5 100755
--- a/ci/gpu/java.sh
+++ b/ci/gpu/java.sh
@@ -31,7 +31,7 @@ export GIT_DESCRIBE_TAG=`git describe --tags`
 export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 
 # ucx-py version
-export UCX_PY_VERSION='0.28.*'
+export UCX_PY_VERSION='0.29.*'
 
 ################################################################################
 # TRAP - Setup trap for removing jitify cache
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 973ca731853..c3e41927a05 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -13,7 +13,7 @@ dependencies:
   - clang=11.1.0
   - clang-tools=11.1.0
   - cupy>=9.5.0,<12.0.0a0
-  - rmm=22.10.*
+  - rmm=22.12.*
   - cmake>=3.20.1,!=3.23.0
   - cmake_setuptools>=0.1.3
   - scikit-build>=0.13.1
@@ -62,7 +62,7 @@ dependencies:
   - sphinx-autobuild
   - myst-nb
   - scipy
-  - dask-cuda=22.10.*
+  - dask-cuda=22.12.*
   - mimesis<4.1
   - packaging
   - protobuf>=3.20.1,<3.21.0a0
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7efa186aede..6b743662e0e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -25,7 +25,7 @@ rapids_cuda_init_architectures(CUDF)
 
 project(
   CUDF
-  VERSION 22.10.00
+  VERSION 22.12.00
   LANGUAGES C CXX CUDA
 )
 if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index 871632b053d..4684e180f00 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -38,7 +38,7 @@ PROJECT_NAME           = "libcudf"
 # could be handy for archiving the generated documentation or if some version
 # control system is used.
 
-PROJECT_NUMBER         = 22.10.00
+PROJECT_NUMBER         = 22.12.00
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer a
@@ -2162,7 +2162,7 @@ SKIP_FUNCTION_MACROS   = YES
 # the path). If a tag file is not located in the directory in which doxygen is
 # run, you must also specify the path to the tagfile here.
 
-TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.10
+TAGFILES               = rmm.tag=https://docs.rapids.ai/api/librmm/22.12
 
 # When a file name is specified after GENERATE_TAGFILE, doxygen will create a
 # tag file that is based on the input files it reads. See section "Linking to
diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
index f4bc205d4ba..b182cb08774 100644
--- a/cpp/examples/basic/CMakeLists.txt
+++ b/cpp/examples/basic/CMakeLists.txt
@@ -16,7 +16,7 @@ file(
 )
 include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
 
-set(CUDF_TAG branch-22.10)
+set(CUDF_TAG branch-22.12)
 CPMFindPackage(
   NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
   GIT_TAG ${CUDF_TAG}
diff --git a/cpp/libcudf_kafka/CMakeLists.txt b/cpp/libcudf_kafka/CMakeLists.txt
index 76a012e7c6e..71341277109 100644
--- a/cpp/libcudf_kafka/CMakeLists.txt
+++ b/cpp/libcudf_kafka/CMakeLists.txt
@@ -22,7 +22,7 @@ include(rapids-find)
 
 project(
   CUDA_KAFKA
-  VERSION 22.10.00
+  VERSION 22.12.00
   LANGUAGES CXX
 )
 
diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index db471316830..ec5b1bd2aac 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -85,9 +85,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '22.10'
+version = '22.12'
 # The full version, including alpha/beta/rc tags.
-release = '22.10.00'
+release = '22.12.00'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/fetch_rapids.cmake b/fetch_rapids.cmake
index 9e2917ffc07..cc2e201fdc3 100644
--- a/fetch_rapids.cmake
+++ b/fetch_rapids.cmake
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
-  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.10/RAPIDS.cmake
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/RAPIDS.cmake
        ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake
   )
 endif()
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 9410f8eacf3..26923927378 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -28,7 +28,7 @@ rapids_cuda_init_architectures(CUDF_JNI)
 
 project(
   CUDF_JNI
-  VERSION 22.10.00
+  VERSION 22.12.00
   LANGUAGES C CXX CUDA
 )
 
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 72e1779401f..6dc0f1800e0 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR)
 
-set(cudf_version 22.10.00)
+set(cudf_version 22.12.00)
 
 include(../../fetch_rapids.cmake)
 

From f72c4ce715080525fbf79d4298b18af862822bd7 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Wed, 28 Sep 2022 10:04:22 -0400
Subject: [PATCH 002/202] add change from 11771

---
 python/strings_udf/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/strings_udf/CMakeLists.txt b/python/strings_udf/CMakeLists.txt
index 53d31575363..41d0d0090cb 100644
--- a/python/strings_udf/CMakeLists.txt
+++ b/python/strings_udf/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
 
-set(strings_udf_version 22.10.00)
+set(strings_udf_version 22.12.00)
 
 include(../../fetch_rapids.cmake)
 

From ec4cdd8c010736c09135edf4e35be345c1c1ada1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 29 Sep 2022 10:16:07 -0400
Subject: [PATCH 003/202] Fix compile warning from CUDF_FUNC_RANGE in a member
 function (#11798)

Compile warning was introduced in #11652 in `bgzip_data_chunk_source.cu`. The warning can be seen here https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/prb/job/cudf-cpu-cuda-build/CUDA=11.5/12417/consoleFull (search for `177-D`)
```
/cudf/cpp/src/io/text/bgzip_data_chunk_source.cu(362): warning #177-D: variable "nvtx3_range__" was declared but never referenced
```
The `nvtx3_range__` is part of the `CUDF_FUNC_RANGE()` macro. The warning is incorrect and likely a compiler bug. The workaround in this PR is to add `[[maybe_unused]]` to the variable declaration.

I was not able to create a small reproducer for compile bug filing.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/11798
---
 cpp/include/cudf/detail/nvtx/nvtx3.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/include/cudf/detail/nvtx/nvtx3.hpp b/cpp/include/cudf/detail/nvtx/nvtx3.hpp
index fb90ea668f5..c77714181ef 100644
--- a/cpp/include/cudf/detail/nvtx/nvtx3.hpp
+++ b/cpp/include/cudf/detail/nvtx/nvtx3.hpp
@@ -1907,7 +1907,7 @@ inline void mark(event_attributes const& attr) noexcept
 #define NVTX3_FUNC_RANGE_IN(D)                                                 \
   static ::nvtx3::registered_message<D> const nvtx3_func_name__{__func__};     \
   static ::nvtx3::event_attributes const nvtx3_func_attr__{nvtx3_func_name__}; \
-  ::nvtx3::domain_thread_range<D> const nvtx3_range__{nvtx3_func_attr__};
+  [[maybe_unused]] ::nvtx3::domain_thread_range<D> const nvtx3_range__{nvtx3_func_attr__};
 
 /**
  * @brief Convenience macro for generating a range in the global domain from the

From 0b28d34658ba51f9517f6e7f240ea7aa3e2b0ed5 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 3 Oct 2022 16:32:00 -0700
Subject: [PATCH 004/202] Remove `cudf_io` namespace alias (#11827)

Some cuIO tests and benchmarks declare `cudf_io` alias for `cudf::io`. This saves a single letter so it's considered to be of very low utility.

Removing all occurrences of this alias.
Also removed a couple of builder calls where the option was being set to default value.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/11827
---
 cpp/benchmarks/io/csv/csv_writer.cpp |   15 +-
 cpp/benchmarks/io/cuio_common.cpp    |   16 +-
 cpp/tests/io/csv_test.cpp            |  450 +++++------
 cpp/tests/io/json_test.cpp           |  172 ++--
 cpp/tests/io/orc_test.cpp            |  536 ++++++-------
 cpp/tests/io/parquet_test.cpp        | 1089 +++++++++++++-------------
 6 files changed, 1133 insertions(+), 1145 deletions(-)

diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp
index d02305cf478..5d61d81bb71 100644
--- a/cpp/benchmarks/io/csv/csv_writer.cpp
+++ b/cpp/benchmarks/io/csv/csv_writer.cpp
@@ -26,8 +26,6 @@
 constexpr size_t data_size         = 256 << 20;
 constexpr cudf::size_type num_cols = 64;
 
-namespace cudf_io = cudf::io;
-
 class CsvWrite : public cudf::benchmark {
 };
 
@@ -44,9 +42,9 @@ void BM_csv_write_varying_inout(benchmark::State& state)
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::csv_writer_options options =
-      cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view).include_header(true);
-    cudf_io::write_csv(options);
+    cudf::io::csv_writer_options options =
+      cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view);
+    cudf::io::write_csv(options);
   }
 
   state.SetBytesProcessed(data_size * state.iterations());
@@ -74,12 +72,11 @@ void BM_csv_write_varying_options(benchmark::State& state)
   auto mem_stats_logger = cudf::memory_stats_logger();
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf_io::csv_writer_options options =
-      cudf_io::csv_writer_options::builder(source_sink.make_sink_info(), view)
-        .include_header(true)
+    cudf::io::csv_writer_options options =
+      cudf::io::csv_writer_options::builder(source_sink.make_sink_info(), view)
         .na_rep(na_per)
         .rows_per_chunk(rows_per_chunk);
-    cudf_io::write_csv(options);
+    cudf::io::write_csv(options);
   }
 
   state.SetBytesProcessed(data_size * state.iterations());
diff --git a/cpp/benchmarks/io/cuio_common.cpp b/cpp/benchmarks/io/cuio_common.cpp
index da64c1bbf3c..1a9c7153644 100644
--- a/cpp/benchmarks/io/cuio_common.cpp
+++ b/cpp/benchmarks/io/cuio_common.cpp
@@ -23,8 +23,6 @@
 
 #include <unistd.h>
 
-namespace cudf_io = cudf::io;
-
 temp_directory const cuio_source_sink_pair::tmpdir{"cudf_gbench"};
 
 std::string random_file_in_dir(std::string const& dir_path)
@@ -43,21 +41,21 @@ cuio_source_sink_pair::cuio_source_sink_pair(io_type type)
 {
 }
 
-cudf_io::source_info cuio_source_sink_pair::make_source_info()
+cudf::io::source_info cuio_source_sink_pair::make_source_info()
 {
   switch (type) {
-    case io_type::FILEPATH: return cudf_io::source_info(file_name);
-    case io_type::HOST_BUFFER: return cudf_io::source_info(buffer.data(), buffer.size());
+    case io_type::FILEPATH: return cudf::io::source_info(file_name);
+    case io_type::HOST_BUFFER: return cudf::io::source_info(buffer.data(), buffer.size());
     default: CUDF_FAIL("invalid input type");
   }
 }
 
-cudf_io::sink_info cuio_source_sink_pair::make_sink_info()
+cudf::io::sink_info cuio_source_sink_pair::make_sink_info()
 {
   switch (type) {
-    case io_type::VOID: return cudf_io::sink_info(&void_sink);
-    case io_type::FILEPATH: return cudf_io::sink_info(file_name);
-    case io_type::HOST_BUFFER: return cudf_io::sink_info(&buffer);
+    case io_type::VOID: return cudf::io::sink_info(&void_sink);
+    case io_type::FILEPATH: return cudf::io::sink_info(file_name);
+    case io_type::HOST_BUFFER: return cudf::io::sink_info(&buffer);
     default: CUDF_FAIL("invalid output type");
   }
 }
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 4f0bdbd9b31..f532836ef95 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -49,8 +49,6 @@
 #include <string>
 #include <vector>
 
-namespace cudf_io = cudf::io;
-
 using cudf::data_type;
 using cudf::type_id;
 using cudf::type_to_id;
@@ -113,12 +111,12 @@ struct CsvFixedPointReaderTest : public CsvReaderTest {
                                            return acc.empty() ? rhs : (acc + "\n" + rhs);
                                          });
 
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .dtypes({data_type{type_to_id<DecimalType>(), scale}})
         .header(-1);
 
-    const auto result      = cudf_io::read_csv(in_opts);
+    const auto result      = cudf::io::read_csv(in_opts);
     const auto result_view = result.tbl->view();
 
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*input_column, result_view.column(0));
@@ -283,7 +281,7 @@ void write_csv_helper(std::string const& filename,
                       std::vector<std::string> const& names = {})
 {
   // csv_writer_options only keeps a pointer to metadata (non-owning)
-  cudf_io::table_metadata metadata{};
+  cudf::io::table_metadata metadata{};
 
   if (not names.empty()) {
     metadata.column_names = names;
@@ -297,14 +295,14 @@ void write_csv_helper(std::string const& filename,
     });
   }
 
-  cudf_io::csv_writer_options writer_options =
-    cudf_io::csv_writer_options::builder(cudf_io::sink_info(filename), table)
+  cudf::io::csv_writer_options writer_options =
+    cudf::io::csv_writer_options::builder(cudf::io::sink_info(filename), table)
       .include_header(include_header)
       .rows_per_chunk(
         1)  // Note: this gets adjusted to multiple of 8 (per legacy code logic and requirements)
       .metadata(&metadata);
 
-  cudf_io::write_csv(writer_options);
+  cudf::io::write_csv(writer_options);
 }
 
 template <typename T>
@@ -352,9 +350,9 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumn)
     std::copy(sequence, sequence + num_rows, output_iterator);
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   expect_column_data_equal(std::vector<TypeParam>(sequence, sequence + num_rows), view.column(0));
@@ -407,11 +405,11 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnNegativeScale)
 
   auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnNegativeScale.csv";
 
-  cudf_io::csv_writer_options writer_options =
-    cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table)
+  cudf::io::csv_writer_options writer_options =
+    cudf::io::csv_writer_options::builder(cudf::io::sink_info(filepath), input_table)
       .include_header(false);
 
-  cudf_io::write_csv(writer_options);
+  cudf::io::write_csv(writer_options);
 
   std::vector<std::string> result_strings;
   result_strings.reserve(reference_strings.size());
@@ -454,11 +452,11 @@ TYPED_TEST(CsvFixedPointWriterTest, SingleColumnPositiveScale)
 
   auto filepath = temp_env->get_temp_dir() + "FixedPointSingleColumnPositiveScale.csv";
 
-  cudf_io::csv_writer_options writer_options =
-    cudf_io::csv_writer_options::builder(cudf_io::sink_info(filepath), input_table)
+  cudf::io::csv_writer_options writer_options =
+    cudf::io::csv_writer_options::builder(cudf::io::sink_info(filepath), input_table)
       .include_header(false);
 
-  cudf_io::write_csv(writer_options);
+  cudf::io::write_csv(writer_options);
 
   std::vector<std::string> result_strings;
   result_strings.reserve(reference_strings.size());
@@ -500,8 +498,8 @@ TEST_F(CsvReaderTest, MultiColumn)
     outfile << line.str();
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .header(-1)
       .dtypes({dtype<int8_t>(),
                dtype<int16_t>(),
@@ -513,7 +511,7 @@ TEST_F(CsvReaderTest, MultiColumn)
                dtype<uint64_t>(),
                dtype<float>(),
                dtype<double>()});
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   expect_column_data_equal(int8_values, view.column(0));
@@ -548,14 +546,14 @@ TEST_F(CsvReaderTest, RepeatColumn)
   }
 
   // repeats column in indexes and names, misses 1 column.
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({dtype<int16_t>(), dtype<int64_t>(), dtype<uint64_t>(), dtype<float>()})
       .names({"A", "B", "C", "D"})
       .use_cols_indexes({1, 0, 0})
       .use_cols_names({"D", "B", "B"})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(3, view.num_columns());
@@ -573,14 +571,14 @@ TEST_F(CsvReaderTest, Booleans)
                "true\nYes,5,foo,false\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A", "B", "C", "D"})
       .dtypes({dtype<int32_t>(), dtype<int32_t>(), dtype<int16_t>(), dtype<bool>()})
       .true_values({"yes", "Yes", "YES", "foo", "FOO"})
       .false_values({"no", "No", "NO", "Bar", "bar"})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   // Booleans are the same (integer) data type, but valued at 0 or 1
   const auto view = result.tbl->view();
@@ -605,13 +603,13 @@ TEST_F(CsvReaderTest, Dates)
     outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}})
       .dayfirst(true)
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -641,13 +639,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampSeconds)
     outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_SECONDS}})
       .dayfirst(true)
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -677,13 +675,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampMilliSeconds)
     outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}})
       .dayfirst(true)
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -713,13 +711,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampMicroSeconds)
     outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_MICROSECONDS}})
       .dayfirst(true)
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -749,13 +747,13 @@ TEST_F(CsvReaderTest, DatesCastToTimestampNanoSeconds)
     outfile << "16/09/2005T1:2:30.400PM\n2/2/1970\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_NANOSECONDS}})
       .dayfirst(true)
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -789,12 +787,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampSeconds)
     }
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_SECONDS}})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -817,12 +815,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMilliSeconds)
     }
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -845,12 +843,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampMicroSeconds)
     }
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_MICROSECONDS}})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -873,12 +871,12 @@ TEST_F(CsvReaderTest, IntegersCastToTimestampNanoSeconds)
     }
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_NANOSECONDS}})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -897,13 +895,13 @@ TEST_F(CsvReaderTest, FloatingPoint)
                "98007199999998;";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({dtype<float>()})
       .lineterminator(';')
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -930,12 +928,12 @@ TEST_F(CsvReaderTest, Strings)
     outfile << "30,stu \"\"vwx\"\" yz" << '\n';
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
-      .quoting(cudf_io::quote_style::NONE);
-  auto result = cudf_io::read_csv(in_opts);
+      .quoting(cudf::io::quote_style::NONE);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(2, view.num_columns());
@@ -960,12 +958,12 @@ TEST_F(CsvReaderTest, StringsQuotes)
     outfile << "30,stu `vwx` yz" << '\n';
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
       .quotechar('`');
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(2, view.num_columns());
@@ -989,13 +987,13 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored)
     outfile << "30,stu \"vwx\" yz" << '\n';
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
-      .quoting(cudf_io::quote_style::NONE)
+      .quoting(cudf::io::quote_style::NONE)
       .doublequote(false);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(2, view.num_columns());
@@ -1015,14 +1013,14 @@ TEST_F(CsvReaderTest, SkiprowsNrows)
     outfile << "1\n2\n3\n4\n5\n6\n7\n8\n9\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({dtype<int32_t>()})
       .header(1)
       .skiprows(2)
       .nrows(2);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -1039,14 +1037,14 @@ TEST_F(CsvReaderTest, ByteRange)
     outfile << "1000\n2000\n3000\n4000\n5000\n6000\n7000\n8000\n9000\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({dtype<int32_t>()})
       .header(-1)
       .byte_range_offset(11)
       .byte_range_size(15);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -1058,13 +1056,13 @@ TEST_F(CsvReaderTest, ByteRange)
 TEST_F(CsvReaderTest, ByteRangeStrings)
 {
   std::string input = "\"a\"\n\"b\"\n\"c\"";
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{input.c_str(), input.size()})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{input.c_str(), input.size()})
       .names({"A"})
       .dtypes({dtype<cudf::string_view>()})
       .header(-1)
       .byte_range_offset(4);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -1081,13 +1079,13 @@ TEST_F(CsvReaderTest, BlanksAndComments)
     outfile << "1\n#blank\n3\n4\n5\n#blank\n\n\n8\n9\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({dtype<int32_t>()})
       .header(-1)
       .comment('#');
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -1104,9 +1102,9 @@ TEST_F(CsvReaderTest, EmptyFile)
     outfile << "";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(0, view.num_columns());
@@ -1120,9 +1118,9 @@ TEST_F(CsvReaderTest, NoDataFile)
     outfile << "\n\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(0, view.num_columns());
@@ -1136,9 +1134,9 @@ TEST_F(CsvReaderTest, HeaderOnlyFile)
     outfile << "\"a\",\"b\",\"c\"\n\n";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(0, view.num_rows());
@@ -1156,11 +1154,11 @@ TEST_F(CsvReaderTest, ArrowFileSource)
   std::shared_ptr<arrow::io::ReadableFile> infile;
   ASSERT_TRUE(arrow::io::ReadableFile::Open(filepath).Value(&infile).ok());
 
-  auto arrow_source = cudf_io::arrow_io_source{infile};
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{&arrow_source})
+  auto arrow_source = cudf::io::arrow_io_source{infile};
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{&arrow_source})
       .dtypes({dtype<int8_t>()});
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -1177,12 +1175,12 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint)
     outfile << "1.2e1+\n3.4e2-\n5.6e3e\n7.8e3A\n9.0Be1\n1C.2";
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({dtype<float>()})
       .header(-1);
-  const auto result = cudf_io::read_csv(in_opts);
+  const auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(1, view.num_columns());
@@ -1199,10 +1197,10 @@ TEST_F(CsvReaderTest, InvalidFloatingPoint)
 TEST_F(CsvReaderTest, StringInference)
 {
   std::string buffer = "\"-1\"\n";
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .header(-1);
-  const auto result = cudf_io::read_csv(in_opts);
+  const auto result = cudf::io::read_csv(in_opts);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->get_column(0).type().id(), type_id::STRING);
@@ -1211,11 +1209,11 @@ TEST_F(CsvReaderTest, StringInference)
 TEST_F(CsvReaderTest, TypeInferenceThousands)
 {
   std::string buffer = "1`400,123,1`234.56\n123`456,123456,12.34";
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .header(-1)
       .thousands('`');
-  const auto result      = cudf_io::read_csv(in_opts);
+  const auto result      = cudf::io::read_csv(in_opts);
   const auto result_view = result.tbl->view();
 
   EXPECT_EQ(result_view.num_columns(), 3);
@@ -1238,12 +1236,12 @@ TEST_F(CsvReaderTest, TypeInferenceWithDecimal)
   // col#1 => STRING (contains digits and period character, which is NOT the decimal point here)
   // col#2 => FLOAT64 (column contains digits and decimal point (i.e., ';'))
   std::string buffer = "1`400,1.23,1`234;56\n123`456,123.456,12;34";
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .header(-1)
       .thousands('`')
       .decimal(';');
-  const auto result      = cudf_io::read_csv(in_opts);
+  const auto result      = cudf::io::read_csv(in_opts);
   const auto result_view = result.tbl->view();
 
   EXPECT_EQ(result_view.num_columns(), 3);
@@ -1263,17 +1261,17 @@ TEST_F(CsvReaderTest, SkipRowsXorSkipFooter)
 {
   std::string buffer = "1,2,3";
 
-  cudf_io::csv_reader_options skiprows_options =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::csv_reader_options skiprows_options =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .header(-1)
       .skiprows(1);
-  EXPECT_NO_THROW(cudf_io::read_csv(skiprows_options));
+  EXPECT_NO_THROW(cudf::io::read_csv(skiprows_options));
 
-  cudf_io::csv_reader_options skipfooter_options =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::csv_reader_options skipfooter_options =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .header(-1)
       .skipfooter(1);
-  EXPECT_NO_THROW(cudf_io::read_csv(skipfooter_options));
+  EXPECT_NO_THROW(cudf::io::read_csv(skipfooter_options));
 }
 
 TEST_F(CsvReaderTest, nullHandling)
@@ -1286,13 +1284,13 @@ TEST_F(CsvReaderTest, nullHandling)
 
   // Test disabling na_filter
   {
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
         .na_filter(false)
         .dtypes({dtype<cudf::string_view>()})
         .header(-1)
         .skip_blank_lines(false);
-    const auto result = cudf_io::read_csv(in_opts);
+    const auto result = cudf::io::read_csv(in_opts);
     const auto view   = result.tbl->view();
     auto expect =
       cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"});
@@ -1301,12 +1299,12 @@ TEST_F(CsvReaderTest, nullHandling)
 
   // Test enabling na_filter
   {
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
         .dtypes({dtype<cudf::string_view>()})
         .header(-1)
         .skip_blank_lines(false);
-    const auto result = cudf_io::read_csv(in_opts);
+    const auto result = cudf::io::read_csv(in_opts);
     const auto view   = result.tbl->view();
     auto expect =
       cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"},
@@ -1317,13 +1315,13 @@ TEST_F(CsvReaderTest, nullHandling)
 
   // Setting na_values with default values
   {
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
         .na_values({"Null"})
         .dtypes({dtype<cudf::string_view>()})
         .header(-1)
         .skip_blank_lines(false);
-    const auto result = cudf_io::read_csv(in_opts);
+    const auto result = cudf::io::read_csv(in_opts);
     const auto view   = result.tbl->view();
     auto expect =
       cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"},
@@ -1334,14 +1332,14 @@ TEST_F(CsvReaderTest, nullHandling)
 
   // Setting na_values without default values
   {
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
         .keep_default_na(false)
         .na_values({"Null"})
         .dtypes({dtype<cudf::string_view>()})
         .header(-1)
         .skip_blank_lines(false);
-    const auto result = cudf_io::read_csv(in_opts);
+    const auto result = cudf::io::read_csv(in_opts);
     const auto view   = result.tbl->view();
     auto expect =
       cudf::test::strings_column_wrapper({"NULL", "", "null", "n/a", "Null", "NA", "nan"},
@@ -1356,14 +1354,14 @@ TEST_F(CsvReaderTest, FailCases)
   std::string buffer = "1,2,3";
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .byte_range_offset(4)
         .skiprows(1),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .byte_range_offset(4)
         .skipfooter(1),
       cudf::logic_error);
@@ -1371,77 +1369,77 @@ TEST_F(CsvReaderTest, FailCases)
 
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .byte_range_offset(4)
         .nrows(1),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .byte_range_size(4)
         .skiprows(1),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .byte_range_size(4)
         .skipfooter(1),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .byte_range_size(4)
         .nrows(1),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .skiprows(1)
         .byte_range_offset(4),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .skipfooter(1)
         .byte_range_offset(4),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .nrows(1)
         .byte_range_offset(4),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .skiprows(1)
         .byte_range_size(4),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .skipfooter(1)
         .byte_range_size(4),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .nrows(1)
         .byte_range_size(4),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .nrows(1)
         .skipfooter(1),
       cudf::logic_error);
@@ -1449,14 +1447,14 @@ TEST_F(CsvReaderTest, FailCases)
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .skipfooter(1)
         .nrows(1),
       cudf::logic_error);
   }
   {
     EXPECT_THROW(
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
         .na_filter(false)
         .na_values({"Null"}),
       cudf::logic_error);
@@ -1472,13 +1470,13 @@ TEST_F(CsvReaderTest, HexTest)
   }
   // specify hex columns by name
   {
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
         .names({"A"})
         .dtypes({dtype<int64_t>()})
         .header(-1)
         .parse_hex({"A"});
-    auto result = cudf_io::read_csv(in_opts);
+    auto result = cudf::io::read_csv(in_opts);
 
     expect_column_data_equal(
       std::vector<int64_t>{0, -4096, 16702650, 11259375, 11259375, 2501034507},
@@ -1487,13 +1485,13 @@ TEST_F(CsvReaderTest, HexTest)
 
   // specify hex columns by index
   {
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
         .names({"A"})
         .dtypes({dtype<int64_t>()})
         .header(-1)
         .parse_hex(std::vector<int>{0});
-    auto result = cudf_io::read_csv(in_opts);
+    auto result = cudf::io::read_csv(in_opts);
 
     expect_column_data_equal(
       std::vector<int64_t>{0, -4096, 16702650, 11259375, 11259375, 2501034507},
@@ -1513,9 +1511,9 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumnWithWriter)
 
   write_csv_helper(filepath, input_table, false);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table);
@@ -1581,8 +1579,8 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter)
 
   write_csv_helper(filepath, input_table, false);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .header(-1)
       .dtypes({dtype<int8_t>(),
                dtype<int16_t>(),
@@ -1594,7 +1592,7 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter)
                dtype<uint64_t>(),
                dtype<float>(),
                dtype<double>()});
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
 
@@ -1629,13 +1627,13 @@ TEST_F(CsvReaderTest, DatesWithWriter)
   // TODO need to add a dayfirst flag?
   write_csv_helper(filepath, input_table, false);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}})
       .dayfirst(true)
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
 
@@ -1654,9 +1652,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     write_csv_helper(filepath, input_table, false);
 
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1);
-    auto result = cudf_io::read_csv(in_opts);
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
+        .names({"A"})
+        .header(-1);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto result_table = result.tbl->view();
 
@@ -1675,9 +1675,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     write_csv_helper(filepath, input_table, false);
 
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1);
-    auto result = cudf_io::read_csv(in_opts);
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
+        .names({"A"})
+        .header(-1);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto result_table = result.tbl->view();
 
@@ -1696,9 +1698,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     write_csv_helper(filepath, input_table, false);
 
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1);
-    auto result = cudf_io::read_csv(in_opts);
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
+        .names({"A"})
+        .header(-1);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto result_table = result.tbl->view();
 
@@ -1718,9 +1722,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     write_csv_helper(filepath, input_table, false);
 
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1);
-    auto result = cudf_io::read_csv(in_opts);
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
+        .names({"A"})
+        .header(-1);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto result_table = result.tbl->view();
 
@@ -1739,9 +1745,11 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     write_csv_helper(filepath, input_table, false);
 
-    cudf_io::csv_reader_options in_opts =
-      cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).names({"A"}).header(-1);
-    auto result = cudf_io::read_csv(in_opts);
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
+        .names({"A"})
+        .header(-1);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto result_table = result.tbl->view();
 
@@ -1760,13 +1768,13 @@ TEST_F(CsvReaderTest, FloatingPointWithWriter)
   // TODO add lineterminator=";"
   write_csv_helper(filepath, input_table, false);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names({"A"})
       .dtypes({dtype<double>()})
       .header(-1);
   // in_opts.lineterminator = ';';
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table);
@@ -1786,12 +1794,12 @@ TEST_F(CsvReaderTest, StringsWithWriter)
   // TODO add quoting style flag?
   write_csv_helper(filepath, input_table, true, names);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
-      .quoting(cudf_io::quote_style::NONE);
-  auto result = cudf_io::read_csv(in_opts);
+      .quoting(cudf::io::quote_style::NONE);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0));
@@ -1811,12 +1819,12 @@ TEST_F(CsvReaderTest, StringsWithWriterSimple)
   // TODO add quoting style flag?
   write_csv_helper(filepath, input_table, true, names);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
-      .quoting(cudf_io::quote_style::NONE);
-  auto result = cudf_io::read_csv(in_opts);
+      .quoting(cudf::io::quote_style::NONE);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0));
@@ -1835,11 +1843,11 @@ TEST_F(CsvReaderTest, StringsEmbeddedDelimiter)
 
   write_csv_helper(filepath, input_table, true, names);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view());
 }
@@ -1858,15 +1866,15 @@ TEST_F(CsvReaderTest, HeaderEmbeddedDelimiter)
 
   write_csv_helper(filepath, input_table, true, names);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes({dtype<int32_t>(),
                dtype<cudf::string_view>(),
                dtype<int32_t>(),
                dtype<int32_t>(),
                dtype<int32_t>()});
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view());
 }
@@ -1877,9 +1885,9 @@ TEST_F(CsvReaderTest, EmptyFileWithWriter)
 
   cudf::table_view empty_table;
   write_csv_helper(filepath, empty_table, false);
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_csv(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(empty_table, result.tbl->view());
 }
@@ -1918,11 +1926,11 @@ TEST_F(CsvReaderTest, UserImplementedSource)
              << "\n";
   }
   TestSource source{csv_data.str()};
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{&source})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{&source})
       .dtypes({dtype<int8_t>(), dtype<int16_t>(), dtype<int32_t>()})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   auto const view = result.tbl->view();
   expect_column_data_equal(int8_values, view.column(0));
@@ -1962,15 +1970,15 @@ TEST_F(CsvReaderTest, DurationsWithWriter)
 
   write_csv_helper(filepath, input_table, true, names);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .names(names)
       .dtypes({data_type{type_id::DURATION_DAYS},
                data_type{type_id::DURATION_SECONDS},
                data_type{type_id::DURATION_MILLISECONDS},
                data_type{type_id::DURATION_MICROSECONDS},
                data_type{type_id::DURATION_NANOSECONDS}});
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table);
@@ -2038,9 +2046,9 @@ TEST_F(CsvReaderTest, ParseInRangeIntegers)
 
   write_csv_helper(filepath, input_table, false);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
 
@@ -2117,9 +2125,9 @@ TEST_F(CsvReaderTest, ParseOutOfRangeIntegers)
 
   write_csv_helper(filepath, input_table, false);
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
 
@@ -2148,9 +2156,9 @@ TEST_F(CsvReaderTest, ReadMaxNumericValue)
     std::copy(sequence, sequence + num_rows, output_iterator);
   }
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath}).header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto view = result.tbl->view();
   expect_column_data_equal(std::vector<uint64_t>(sequence, sequence + num_rows), view.column(0));
@@ -2164,8 +2172,8 @@ TEST_F(CsvReaderTest, DefaultWriteChunkSize)
     auto input_column = column_wrapper<int32_t>(sequence, sequence + num_rows);
     auto input_table  = cudf::table_view{std::vector<cudf::column_view>{input_column}};
 
-    cudf_io::csv_writer_options opts =
-      cudf_io::csv_writer_options::builder(cudf_io::sink_info{"unused.path"}, input_table);
+    cudf::io::csv_writer_options opts =
+      cudf::io::csv_writer_options::builder(cudf::io::sink_info{"unused.path"}, input_table);
     ASSERT_EQ(num_rows, opts.get_rows_per_chunk());
   }
 }
@@ -2174,12 +2182,12 @@ TEST_F(CsvReaderTest, DtypesMap)
 {
   std::string csv_in{"12,9\n34,8\n56,7"};
 
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{csv_in.c_str(), csv_in.size()})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
       .names({"A", "B"})
       .dtypes({{"B", dtype<int16_t>()}, {"A", dtype<int32_t>()}})
       .header(-1);
-  auto result = cudf_io::read_csv(in_opts);
+  auto result = cudf::io::read_csv(in_opts);
 
   const auto result_table = result.tbl->view();
   ASSERT_EQ(result_table.num_columns(), 2);
@@ -2191,12 +2199,12 @@ TEST_F(CsvReaderTest, DtypesMap)
 
 TEST_F(CsvReaderTest, DtypesMapPartial)
 {
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{nullptr, 0})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{nullptr, 0})
       .names({"A", "B"})
       .dtypes({{"A", dtype<int16_t>()}});
   {
-    auto result = cudf_io::read_csv(in_opts);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto view = result.tbl->view();
     ASSERT_EQ(type_id::INT16, view.column(0).type().id());
@@ -2206,7 +2214,7 @@ TEST_F(CsvReaderTest, DtypesMapPartial)
 
   in_opts.set_dtypes({{"B", dtype<uint32_t>()}});
   {
-    auto result = cudf_io::read_csv(in_opts);
+    auto result = cudf::io::read_csv(in_opts);
 
     const auto view = result.tbl->view();
     ASSERT_EQ(type_id::STRING, view.column(0).type().id());
@@ -2216,12 +2224,12 @@ TEST_F(CsvReaderTest, DtypesMapPartial)
 
 TEST_F(CsvReaderTest, DtypesArrayInvalid)
 {
-  cudf_io::csv_reader_options in_opts =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{nullptr, 0})
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{nullptr, 0})
       .names({"A", "B", "C"})
       .dtypes(std::vector<cudf::data_type>{dtype<int16_t>(), dtype<int8_t>()});
 
-  EXPECT_THROW(cudf_io::read_csv(in_opts), cudf::logic_error);
+  EXPECT_THROW(cudf::io::read_csv(in_opts), cudf::logic_error);
 }
 
 TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch)
@@ -2234,16 +2242,16 @@ TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch)
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, str_column});
 
   // write that dataframe to a csv using default options to some temporary file
-  cudf_io::csv_writer_options writer_options =
-    cudf_io::csv_writer_options::builder(cudf_io::sink_info{filepath}, input_table);
-  cudf_io::write_csv(writer_options);
+  cudf::io::csv_writer_options writer_options =
+    cudf::io::csv_writer_options::builder(cudf::io::sink_info{filepath}, input_table);
+  cudf::io::write_csv(writer_options);
 
   // read the temp csv file using default options
-  cudf_io::csv_reader_options read_options =
-    cudf_io::csv_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::csv_reader_options read_options =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
 
-  cudf_io::table_with_metadata new_table_and_metadata = cudf_io::read_csv(read_options);
+  cudf::io::table_with_metadata new_table_and_metadata = cudf::io::read_csv(read_options);
 
   // verify that the tables are identical, or as identical as expected.
   const auto new_table_view = new_table_and_metadata.tbl->view();
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index 5a0db6e3c64..d7ab881861a 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -62,8 +62,6 @@ using column_wrapper =
                             cudf::test::strings_column_wrapper,
                             cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
 
-namespace cudf_io = cudf::io;
-
 cudf::test::TempDirTestEnvironment* const temp_env =
   static_cast<cudf::test::TempDirTestEnvironment*>(
     ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
@@ -239,12 +237,12 @@ TEST_P(JsonReaderParamTest, BasicJsonLines)
     {{{"0", "1"}, {"1", "1.1"}}, {{"0", "2"}, {"1", "2.2"}}, {{"0", "3"}, {"1", "3.3"}}}, "\n");
   std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<double>()})
       .lines(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 2);
   EXPECT_EQ(result.tbl->num_rows(), 3);
@@ -286,13 +284,13 @@ TEST_P(JsonReaderParamTest, FloatingPoint)
     outfile << data;
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({dtype<float>()})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT32);
@@ -318,13 +316,13 @@ TEST_P(JsonReaderParamTest, JsonLinesStrings)
                                                 "\n");
   std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
       .dtypes({{"2", dtype<cudf::string_view>()}, {"0", dtype<int32_t>()}, {"1", dtype<double>()}})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 3);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -386,8 +384,8 @@ TEST_P(JsonReaderParamTest, MultiColumn)
     outfile << line.str();
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({dtype<int8_t>(),
                dtype<int16_t>(),
                dtype<int32_t>(),
@@ -396,7 +394,7 @@ TEST_P(JsonReaderParamTest, MultiColumn)
                dtype<double>()})
       .lines(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
 
@@ -443,12 +441,12 @@ TEST_P(JsonReaderParamTest, Booleans)
     outfile << data;
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({dtype<bool>()})
       .lines(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   // Booleans are the same (integer) data type, but valued at 0 or 1
   const auto view = result.tbl->view();
@@ -488,13 +486,13 @@ TEST_P(JsonReaderParamTest, Dates)
     outfile << data;
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({data_type{type_id::TIMESTAMP_MILLISECONDS}})
       .lines(true)
       .dayfirst(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(result.tbl->num_columns(), 1);
@@ -544,12 +542,12 @@ TEST_P(JsonReaderParamTest, Durations)
     outfile << data;
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({data_type{type_id::DURATION_NANOSECONDS}})
       .lines(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(result.tbl->num_columns(), 1);
@@ -583,12 +581,12 @@ TEST_P(JsonReaderParamTest, JsonLinesDtypeInference)
                                                 "\n");
   std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 3);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -623,12 +621,12 @@ TEST_P(JsonReaderParamTest, JsonLinesFileInput)
   outfile << data;
   outfile.close();
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{fname})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{fname})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 2);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -652,13 +650,13 @@ TEST_F(JsonReaderTest, JsonLinesByteRange)
   outfile << "[1000]\n[2000]\n[3000]\n[4000]\n[5000]\n[6000]\n[7000]\n[8000]\n[9000]\n";
   outfile.close();
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{fname})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{fname})
       .lines(true)
       .byte_range_offset(11)
       .byte_range_size(20);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->num_rows(), 3);
@@ -681,12 +679,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjects)
   outfile << " {\"co\\\"l1\" : 1, \"col2\" : 2.0} \n";
   outfile.close();
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{fname})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{fname})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 2);
   EXPECT_EQ(result.tbl->num_rows(), 1);
@@ -707,12 +705,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsStrings)
   auto const test_opt          = GetParam();
   bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
   auto test_json_objects       = [test_experimental](std::string const& data) {
-    cudf_io::json_reader_options in_options =
-      cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+    cudf::io::json_reader_options in_options =
+      cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
         .lines(true)
         .experimental(test_experimental);
 
-    cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+    cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
     EXPECT_EQ(result.tbl->num_columns(), 3);
     EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -751,12 +749,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsMissingData)
   std::string const data =
     "{              \"col2\":1.1, \"col3\":\"aaa\"}\n"
     "{\"col1\":200,               \"col3\":\"bbb\"}\n";
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 3);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -790,12 +788,12 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder)
     "{\"col1\":100, \"col2\":1.1, \"col3\":\"aaa\"}\n"
     "{\"col3\":\"bbb\", \"col1\":200, \"col2\":2.2}\n";
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 3);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -825,9 +823,9 @@ TEST_F(JsonReaderTest, EmptyFile)
     outfile << "";
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true);
-  auto result = cudf_io::read_json(in_options);
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
+  auto result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(0, view.num_columns());
@@ -842,9 +840,9 @@ TEST_F(JsonReaderTest, NoDataFile)
     outfile << "{}\n";
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath}).lines(true);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(0, view.num_columns());
@@ -862,13 +860,13 @@ TEST_F(JsonReaderTest, ArrowFileSource)
   std::shared_ptr<arrow::io::ReadableFile> infile;
   ASSERT_TRUE(arrow::io::ReadableFile::Open(fname).Value(&infile).ok());
 
-  auto arrow_source = cudf_io::arrow_io_source{infile};
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{&arrow_source})
+  auto arrow_source = cudf::io::arrow_io_source{infile};
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{&arrow_source})
       .dtypes({dtype<int8_t>()})
       .lines(true);
   ;
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT8);
@@ -899,12 +897,12 @@ TEST_P(JsonReaderParamTest, InvalidFloatingPoint)
     outfile << data;
   }
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .dtypes({dtype<float>()})
       .lines(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::FLOAT32);
@@ -925,11 +923,11 @@ TEST_P(JsonReaderParamTest, StringInference)
   std::string record_orient    = to_records_orient({{{"0", R"("-1")"}}}, "\n");
   std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.c_str(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.c_str(), data.size()})
       .lines(true)
       .experimental(test_experimental);
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 1);
   EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRING);
@@ -1009,12 +1007,12 @@ TEST_P(JsonReaderParamTest, ParseInRangeIntegers)
     std::ofstream outfile(filepath, std::ofstream::out);
     outfile << line.str();
   }
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
 
@@ -1114,12 +1112,12 @@ TEST_P(JsonReaderParamTest, ParseOutOfRangeIntegers)
     std::ofstream outfile(filepath, std::ofstream::out);
     outfile << line.str();
   }
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
 
@@ -1155,12 +1153,12 @@ TEST_P(JsonReaderParamTest, JsonLinesMultipleFileInputs)
   outfile2 << data[1];
   outfile2.close();
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{{file1, file2}})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{{file1, file2}})
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 2);
   EXPECT_EQ(result.tbl->num_rows(), 4);
@@ -1183,23 +1181,23 @@ TEST_F(JsonReaderTest, BadDtypeParams)
 {
   std::string buffer = "[1,2,3,4]";
 
-  cudf_io::json_reader_options options_vec =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::json_reader_options options_vec =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .lines(true)
       .dtypes({dtype<int8_t>()});
 
   // should throw because there are four columns and only one dtype
-  EXPECT_THROW(cudf_io::read_json(options_vec), cudf::logic_error);
+  EXPECT_THROW(cudf::io::read_json(options_vec), cudf::logic_error);
 
-  cudf_io::json_reader_options options_map =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{buffer.c_str(), buffer.size()})
+  cudf::io::json_reader_options options_map =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
       .lines(true)
       .dtypes(std::map<std::string, cudf::data_type>{{"0", dtype<int8_t>()},
                                                      {"1", dtype<int8_t>()},
                                                      {"2", dtype<int8_t>()},
                                                      {"wrong_name", dtype<int8_t>()}});
   // should throw because one of the columns is not in the dtype map
-  EXPECT_THROW(cudf_io::read_json(options_map), cudf::logic_error);
+  EXPECT_THROW(cudf::io::read_json(options_map), cudf::logic_error);
 }
 
 TEST_F(JsonReaderTest, JsonExperimentalBasic)
@@ -1209,9 +1207,9 @@ TEST_F(JsonReaderTest, JsonExperimentalBasic)
   outfile << R"([{"a":"11", "b":"1.1"},{"a":"22", "b":"2.2"}])";
   outfile.close();
 
-  cudf_io::json_reader_options options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{fname}).experimental(true);
-  auto result = cudf_io::read_json(options);
+  cudf::io::json_reader_options options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{fname}).experimental(true);
+  auto result = cudf::io::read_json(options);
 
   EXPECT_EQ(result.tbl->num_columns(), 2);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -1366,15 +1364,15 @@ TEST_P(JsonReaderParamTest, JsonDtypeSchema)
 
   std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
 
-  std::map<std::string, cudf_io::schema_element> dtype_schema{
+  std::map<std::string, cudf::io::schema_element> dtype_schema{
     {"2", {dtype<cudf::string_view>()}}, {"0", {dtype<int32_t>()}}, {"1", {dtype<double>()}}};
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(cudf_io::source_info{data.data(), data.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
       .dtypes(dtype_schema)
       .lines(true)
       .experimental(test_experimental);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   EXPECT_EQ(result.tbl->num_columns(), 3);
   EXPECT_EQ(result.tbl->num_rows(), 2);
@@ -1399,7 +1397,7 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema)
 {
   std::string json_string = R"( [{"a":[123, {"0": 123}], "b":1.0}, {"b":1.1}, {"b":2.1}])";
 
-  std::map<std::string, cudf_io::schema_element> dtype_schema{
+  std::map<std::string, cudf::io::schema_element> dtype_schema{
     {"a",
      {
        data_type{cudf::type_id::LIST},
@@ -1408,14 +1406,14 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema)
     {"b", {dtype<int32_t>()}},
   };
 
-  cudf_io::json_reader_options in_options =
-    cudf_io::json_reader_options::builder(
-      cudf_io::source_info{json_string.data(), json_string.size()})
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{json_string.data(), json_string.size()})
       .dtypes(dtype_schema)
       .lines(false)
       .experimental(true);
 
-  cudf_io::table_with_metadata result = cudf_io::read_json(in_options);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   // Make sure we have columns "a" and "b"
   ASSERT_EQ(result.tbl->num_columns(), 2);
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index a658ed0a55d..2f761eeac66 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -42,8 +42,6 @@
 #define ZSTD_SUPPORTED 0
 #endif
 
-namespace cudf_io = cudf::io;
-
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
   typename std::conditional<std::is_same_v<T, cudf::string_view>,
@@ -182,9 +180,9 @@ struct SkipRowTest {
       sequence, sequence + file_num_rows);
     table_view input_table({input_col});
 
-    cudf_io::orc_writer_options out_opts =
-      cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, input_table);
-    cudf_io::write_orc(out_opts);
+    cudf::io::orc_writer_options out_opts =
+      cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, input_table);
+    cudf::io::write_orc(out_opts);
 
     auto begin_sequence = sequence, end_sequence = sequence;
     if (skip_rows < file_num_rows) {
@@ -203,12 +201,12 @@ struct SkipRowTest {
     auto filepath =
       temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc");
     auto expected_result = get_expected_result(filepath, skip_rows, file_num_rows, read_num_rows);
-    cudf_io::orc_reader_options in_opts =
-      cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::orc_reader_options in_opts =
+      cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
         .use_index(false)
         .skip_rows(skip_rows)
         .num_rows(read_num_rows);
-    auto result = cudf_io::read_orc(in_opts);
+    auto result = cudf::io::read_orc(in_opts);
     CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view());
   }
 
@@ -218,11 +216,11 @@ struct SkipRowTest {
       temp_env->get_temp_filepath("SkipRowTest" + std::to_string(test_calls++) + ".orc");
     auto expected_result =
       get_expected_result(filepath, skip_rows, file_num_rows, file_num_rows - skip_rows);
-    cudf_io::orc_reader_options in_opts =
-      cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::orc_reader_options in_opts =
+      cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
         .use_index(false)
         .skip_rows(skip_rows);
-    auto result = cudf_io::read_orc(in_opts);
+    auto result = cudf::io::read_orc(in_opts);
     CUDF_TEST_EXPECT_TABLES_EQUAL(expected_result->view(), result.tbl->view());
   }
 };
@@ -239,13 +237,13 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumn)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -261,13 +259,13 @@ TYPED_TEST(OrcWriterNumericTypeTest, SingleColumnWithNulls)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("OrcSingleColumnWithNulls.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -283,15 +281,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, Timestamps)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("OrcTimestamps.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .use_index(false)
       .timestamp_type(this->type());
-  auto result = cudf_io::read_orc(in_opts);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -309,15 +307,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("OrcTimestampsWithNulls.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .use_index(false)
       .timestamp_type(this->type());
-  auto result = cudf_io::read_orc(in_opts);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -333,15 +331,15 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .use_index(false)
       .timestamp_type(this->type());
-  auto result = cudf_io::read_orc(in_opts);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -381,7 +379,7 @@ TEST_F(OrcWriterTest, MultiColumn)
 
   table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8, col9});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("bools");
   expected_metadata.column_metadata[1].set_name("int8s");
   expected_metadata.column_metadata[2].set_name("int16s");
@@ -394,14 +392,14 @@ TEST_F(OrcWriterTest, MultiColumn)
   expected_metadata.column_metadata[9].set_name("structs");
 
   auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -449,7 +447,7 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls)
   struct_col col8{{ages_col}, {0, 1, 1, 0, 1, 1, 0, 1, 1, 0}};
   table_view expected({col0, col1, col2, col3, col4, col5, col6, col7, col8});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("bools");
   expected_metadata.column_metadata[1].set_name("int8s");
   expected_metadata.column_metadata[2].set_name("int16s");
@@ -461,14 +459,14 @@ TEST_F(OrcWriterTest, MultiColumnWithNulls)
   expected_metadata.column_metadata[8].set_name("structs");
 
   auto filepath = temp_env->get_temp_filepath("OrcMultiColumnWithNulls.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -484,15 +482,15 @@ TEST_F(OrcWriterTest, ReadZeroRows)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("OrcSingleColumn.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .use_index(false)
       .num_rows(0);
-  auto result = cudf_io::read_orc(in_opts);
+  auto result = cudf::io::read_orc(in_opts);
 
   EXPECT_EQ(0, result.tbl->num_rows());
   EXPECT_EQ(1, result.tbl->num_columns());
@@ -513,20 +511,20 @@ TEST_F(OrcWriterTest, Strings)
 
   table_view expected({col0, col1, col2});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_other");
   expected_metadata.column_metadata[1].set_name("col_string");
   expected_metadata.column_metadata[2].set_name("col_another");
 
   auto filepath = temp_env->get_temp_filepath("OrcStrings.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -560,7 +558,7 @@ TEST_F(OrcWriterTest, SlicedTable)
 
   table_view expected({col0, col1, col2, col3, col4, col5});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_other");
   expected_metadata.column_metadata[1].set_name("col_string");
   expected_metadata.column_metadata[2].set_name("col_another");
@@ -571,14 +569,14 @@ TEST_F(OrcWriterTest, SlicedTable)
   auto expected_slice = cudf::slice(expected, {2, static_cast<cudf::size_type>(num_rows)});
 
   auto filepath = temp_env->get_temp_filepath("SlicedTable.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -592,19 +590,20 @@ TEST_F(OrcWriterTest, HostBuffer)
 
   table_view expected{{col}};
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_other");
 
   std::vector<char> out_buffer;
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info(out_buffer.data(), out_buffer.size()))
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(
+      cudf::io::source_info(out_buffer.data(), out_buffer.size()))
       .use_index(false);
-  const auto result = cudf_io::read_orc(in_opts);
+  const auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -625,14 +624,14 @@ TEST_F(OrcWriterTest, negTimestampsNano)
   table_view expected({timestamps_ns});
 
   auto filepath = temp_env->get_temp_filepath("OrcNegTimestamp.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
 
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     expected.column(0), result.tbl->view().column(0), cudf::test::debug_output_level::ALL_ERRORS);
@@ -647,13 +646,13 @@ TEST_F(OrcWriterTest, Slice)
   cudf::table_view tbl{result};
 
   auto filepath = temp_env->get_temp_filepath("Slice.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto read_table = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto read_table = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl);
 }
@@ -664,13 +663,13 @@ TEST_F(OrcChunkedWriterTest, SingleTable)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedSingle.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(*table1);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(*table1);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
 }
@@ -684,13 +683,13 @@ TEST_F(OrcChunkedWriterTest, SimpleTable)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedSimple.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -704,13 +703,13 @@ TEST_F(OrcChunkedWriterTest, LargeTables)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLarge.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -730,17 +729,17 @@ TEST_F(OrcChunkedWriterTest, ManyTables)
   auto expected = cudf::concatenate(table_views);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer writer(opts);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer writer(opts);
   std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) {
     writer.write(tbl);
   });
   writer.close();
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -760,20 +759,20 @@ TEST_F(OrcChunkedWriterTest, Metadata)
 
   table_view expected({col0, col1, col2});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_other");
   expected_metadata.column_metadata[1].set_name("col_string");
   expected_metadata.column_metadata[2].set_name("col_another");
 
   auto filepath = temp_env->get_temp_filepath("ChunkedMetadata.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath})
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath})
       .metadata(&expected_metadata);
-  cudf_io::orc_chunked_writer(opts).write(expected).write(expected);
+  cudf::io::orc_chunked_writer(opts).write(expected).write(expected);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
 }
@@ -793,13 +792,13 @@ TEST_F(OrcChunkedWriterTest, Strings)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStrings.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -811,9 +810,9 @@ TEST_F(OrcChunkedWriterTest, MismatchedTypes)
   auto table2 = create_random_fixed_table<float>(4, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer writer(opts);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer writer(opts);
   writer.write(*table1);
   EXPECT_THROW(writer.write(*table2), cudf::logic_error);
 }
@@ -824,9 +823,9 @@ TEST_F(OrcChunkedWriterTest, ChunkedWritingAfterClosing)
   auto table1 = create_random_fixed_table<int>(4, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedWritingAfterClosing.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer writer(opts);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer writer(opts);
   writer.write(*table1);
   writer.close();
   EXPECT_THROW(writer.write(*table1), cudf::logic_error);
@@ -839,9 +838,9 @@ TEST_F(OrcChunkedWriterTest, MismatchedStructure)
   auto table2 = create_random_fixed_table<int>(3, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer writer(opts);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer writer(opts);
   writer.write(*table1);
   EXPECT_THROW(writer.write(*table2), cudf::logic_error);
 }
@@ -855,13 +854,13 @@ TEST_F(OrcChunkedWriterTest, ReadStripes)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStripes.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(*table1).write(*table2);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(*table1).write(*table2);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).stripes({{1, 0, 1}});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{1, 0, 1}});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -872,15 +871,15 @@ TEST_F(OrcChunkedWriterTest, ReadStripesError)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStripesError.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(*table1);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(*table1);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).stripes({{0, 1}});
-  EXPECT_THROW(cudf_io::read_orc(read_opts), cudf::logic_error);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).stripes({{0, 1}});
+  EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error);
   read_opts.set_stripes({{-1}});
-  EXPECT_THROW(cudf_io::read_orc(read_opts), cudf::logic_error);
+  EXPECT_THROW(cudf::io::read_orc(read_opts), cudf::logic_error);
 }
 
 TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize)
@@ -915,13 +914,13 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -958,13 +957,13 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.orc");
-  cudf_io::chunked_orc_writer_options opts =
-    cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::orc_chunked_writer(opts).write(tbl1).write(tbl2);
+  cudf::io::chunked_orc_writer_options opts =
+    cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::orc_chunked_writer(opts).write(tbl1).write(tbl2);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -1002,30 +1001,30 @@ TEST_F(OrcStatisticsTest, Basic)
 
   auto filepath = temp_env->get_temp_filepath("OrcStatsMerge.orc");
 
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_orc(out_opts);
 
-  auto const stats = cudf_io::read_parsed_orc_statistics(cudf_io::source_info{filepath});
+  auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath});
 
   auto const expected_column_names =
     std::vector<std::string>{"", "_col0", "_col1", "_col2", "_col3", "_col4"};
   EXPECT_EQ(stats.column_names, expected_column_names);
 
-  auto validate_statistics = [&](std::vector<cudf_io::column_statistics> const& stats) {
+  auto validate_statistics = [&](std::vector<cudf::io::column_statistics> const& stats) {
     auto& s0 = stats[0];
     EXPECT_EQ(*s0.number_of_values, 9ul);
 
     auto& s1 = stats[1];
     EXPECT_EQ(*s1.number_of_values, 4ul);
-    auto& ts1 = std::get<cudf_io::integer_statistics>(s1.type_specific_stats);
+    auto& ts1 = std::get<cudf::io::integer_statistics>(s1.type_specific_stats);
     EXPECT_EQ(*ts1.minimum, 1);
     EXPECT_EQ(*ts1.maximum, 7);
     EXPECT_EQ(*ts1.sum, 16);
 
     auto& s2 = stats[2];
     EXPECT_EQ(*s2.number_of_values, 4ul);
-    auto& ts2 = std::get<cudf_io::double_statistics>(s2.type_specific_stats);
+    auto& ts2 = std::get<cudf::io::double_statistics>(s2.type_specific_stats);
     EXPECT_EQ(*ts2.minimum, 1.);
     EXPECT_EQ(*ts2.maximum, 7.);
     // No sum ATM, filed #7087
@@ -1033,18 +1032,18 @@ TEST_F(OrcStatisticsTest, Basic)
 
     auto& s3 = stats[3];
     EXPECT_EQ(*s3.number_of_values, 9ul);
-    auto& ts3 = std::get<cudf_io::string_statistics>(s3.type_specific_stats);
+    auto& ts3 = std::get<cudf::io::string_statistics>(s3.type_specific_stats);
     EXPECT_EQ(*ts3.minimum, "Friday");
     EXPECT_EQ(*ts3.maximum, "Wednesday");
     EXPECT_EQ(*ts3.sum, 58ul);
 
     auto& s4 = stats[4];
     EXPECT_EQ(*s4.number_of_values, 9ul);
-    EXPECT_EQ(std::get<cudf_io::bucket_statistics>(s4.type_specific_stats).count[0], 8ul);
+    EXPECT_EQ(std::get<cudf::io::bucket_statistics>(s4.type_specific_stats).count[0], 8ul);
 
     auto& s5 = stats[5];
     EXPECT_EQ(*s5.number_of_values, 4ul);
-    auto& ts5 = std::get<cudf_io::timestamp_statistics>(s5.type_specific_stats);
+    auto& ts5 = std::get<cudf::io::timestamp_statistics>(s5.type_specific_stats);
     EXPECT_EQ(*ts5.minimum_utc, 1000);
     EXPECT_EQ(*ts5.maximum_utc, 7000);
     ASSERT_FALSE(ts5.minimum);
@@ -1070,18 +1069,18 @@ TEST_F(OrcWriterTest, SlicedValidMask)
   auto sliced_col = cudf::slice(static_cast<cudf::column_view>(col), indices);
   cudf::table_view tbl{sliced_col};
 
-  cudf_io::table_input_metadata expected_metadata(tbl);
+  cudf::io::table_input_metadata expected_metadata(tbl);
   expected_metadata.column_metadata[0].set_name("col_string");
 
   auto filepath = temp_env->get_temp_filepath("OrcStrings.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(tbl, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1093,13 +1092,13 @@ TEST_F(OrcReaderTest, SingleInputs)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
 
   auto filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc");
-  cudf_io::orc_writer_options write_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath1}, table1->view());
-  cudf_io::write_orc(write_opts);
+  cudf::io::orc_writer_options write_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view());
+  cudf::io::write_orc(write_opts);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{{filepath1}});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1}});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
 }
@@ -1125,11 +1124,11 @@ TEST_F(OrcReaderTest, zstdCompressionRegression)
 
   auto source =
     cudf::io::source_info(reinterpret_cast<const char*>(input_buffer), sizeof(input_buffer));
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(source).use_index(false);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(source).use_index(false);
 
   cudf::io::table_with_metadata result;
-  CUDF_EXPECT_NO_THROW(result = cudf_io::read_orc(in_opts));
+  CUDF_EXPECT_NO_THROW(result = cudf::io::read_orc(in_opts));
   EXPECT_EQ(1920800, result.tbl->num_rows());
 }
 
@@ -1143,21 +1142,21 @@ TEST_F(OrcReaderTest, MultipleInputs)
 
   auto const filepath1 = temp_env->get_temp_filepath("SimpleTable1.orc");
   {
-    cudf_io::orc_writer_options out_opts =
-      cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath1}, table1->view());
-    cudf_io::write_orc(out_opts);
+    cudf::io::orc_writer_options out_opts =
+      cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath1}, table1->view());
+    cudf::io::write_orc(out_opts);
   }
 
   auto const filepath2 = temp_env->get_temp_filepath("SimpleTable2.orc");
   {
-    cudf_io::orc_writer_options out_opts =
-      cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath2}, table2->view());
-    cudf_io::write_orc(out_opts);
+    cudf::io::orc_writer_options out_opts =
+      cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath2}, table2->view());
+    cudf::io::write_orc(out_opts);
   }
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{{filepath1, filepath2}});
-  auto result = cudf_io::read_orc(read_opts);
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{{filepath1, filepath2}});
+  auto result = cudf::io::read_orc(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -1180,14 +1179,14 @@ TEST_P(OrcWriterTestDecimal, Decimal64)
   cudf::table_view tbl({static_cast<cudf::column_view>(col)});
 
   auto filepath = temp_env->get_temp_filepath("Decimal64.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
 
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(tbl.column(0), result.tbl->view().column(0));
 }
@@ -1211,14 +1210,14 @@ TEST_F(OrcWriterTest, Decimal32)
   cudf::table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("Decimal32.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected);
 
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, result.tbl->view().column(0));
 }
@@ -1248,15 +1247,15 @@ TEST_F(OrcStatisticsTest, Overflow)
 
   auto filepath = temp_env->get_temp_filepath("OrcStatsOverflow.orc");
 
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_orc(out_opts);
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_orc(out_opts);
 
-  auto const stats = cudf_io::read_parsed_orc_statistics(cudf_io::source_info{filepath});
+  auto const stats = cudf::io::read_parsed_orc_statistics(cudf::io::source_info{filepath});
 
   auto check_sum_exist = [&](int idx, bool expected) {
     auto const& s  = stats.file_stats[idx];
-    auto const& ts = std::get<cudf_io::integer_statistics>(s.type_specific_stats);
+    auto const& ts = std::get<cudf::io::integer_statistics>(s.type_specific_stats);
     EXPECT_EQ(ts.sum.has_value(), expected);
   };
   check_sum_exist(1, false);
@@ -1311,8 +1310,8 @@ TEST_F(OrcStatisticsTest, HasNull)
     0x4F, 0x52, 0x43, 0x17,
   };
 
-  auto const stats = cudf_io::read_parsed_orc_statistics(
-    cudf_io::source_info{reinterpret_cast<char const*>(nulls_orc.data()), nulls_orc.size()});
+  auto const stats = cudf::io::read_parsed_orc_statistics(
+    cudf::io::source_info{reinterpret_cast<char const*>(nulls_orc.data()), nulls_orc.size()});
 
   EXPECT_EQ(stats.file_stats[1].has_null, true);
   EXPECT_EQ(stats.file_stats[2].has_null, false);
@@ -1343,35 +1342,35 @@ TEST_P(OrcWriterTestStripes, StripeSize)
   auto validate = [&](std::vector<char> const& orc_buffer) {
     auto const expected_stripe_num =
       std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
-    auto const stats = cudf_io::read_parsed_orc_statistics(
-      cudf_io::source_info(orc_buffer.data(), orc_buffer.size()));
+    auto const stats = cudf::io::read_parsed_orc_statistics(
+      cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
     EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
 
-    cudf_io::orc_reader_options in_opts =
-      cudf_io::orc_reader_options::builder(
-        cudf_io::source_info(orc_buffer.data(), orc_buffer.size()))
+    cudf::io::orc_reader_options in_opts =
+      cudf::io::orc_reader_options::builder(
+        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
         .use_index(false);
-    auto result = cudf_io::read_orc(in_opts);
+    auto result = cudf::io::read_orc(in_opts);
 
     CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
   };
 
   {
     std::vector<char> out_buffer_chunked;
-    cudf_io::chunked_orc_writer_options opts =
-      cudf_io::chunked_orc_writer_options::builder(cudf_io::sink_info(&out_buffer_chunked))
+    cudf::io::chunked_orc_writer_options opts =
+      cudf::io::chunked_orc_writer_options::builder(cudf::io::sink_info(&out_buffer_chunked))
         .stripe_size_rows(size_rows)
         .stripe_size_bytes(size_bytes);
-    cudf_io::orc_chunked_writer(opts).write(expected->view());
+    cudf::io::orc_chunked_writer(opts).write(expected->view());
     validate(out_buffer_chunked);
   }
   {
     std::vector<char> out_buffer;
-    cudf_io::orc_writer_options out_opts =
-      cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), expected->view())
+    cudf::io::orc_writer_options out_opts =
+      cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), expected->view())
         .stripe_size_rows(size_rows)
         .stripe_size_bytes(size_bytes);
-    cudf_io::write_orc(out_opts);
+    cudf::io::write_orc(out_opts);
     validate(out_buffer);
   }
 }
@@ -1392,15 +1391,15 @@ TEST_F(OrcWriterTest, StripeSizeInvalid)
   std::vector<char> out_buffer;
 
   EXPECT_THROW(
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view())
       .stripe_size_rows(511),
     cudf::logic_error);
   EXPECT_THROW(
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view())
       .stripe_size_bytes(63 << 10),
     cudf::logic_error);
   EXPECT_THROW(
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info(&out_buffer), unused_table->view())
       .row_index_stride(511),
     cudf::logic_error);
 }
@@ -1438,18 +1437,18 @@ TEST_F(OrcWriterTest, TestMap)
 
   table_view expected({*list_col});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_list_column_as_map();
 
   auto filepath = temp_env->get_temp_filepath("MapColumn.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}).use_index(false);
-  auto result = cudf_io::read_orc(in_opts);
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath}).use_index(false);
+  auto result = cudf::io::read_orc(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1466,22 +1465,22 @@ TEST_F(OrcReaderTest, NestedColumnSelection)
   struct_col s_col{child_col1, child_col2};
   table_view expected({s_col});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("struct_s");
   expected_metadata.column_metadata[0].child(0).set_name("field_a");
   expected_metadata.column_metadata[0].child(1).set_name("field_b");
 
   auto filepath = temp_env->get_temp_filepath("OrcNestedSelection.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .use_index(false)
       .columns({"struct_s.field_b"});
-  auto result = cudf_io::read_orc(in_opts);
+  auto result = cudf::io::read_orc(in_opts);
 
   // Verify that only one child column is included in the output table
   ASSERT_EQ(1, result.tbl->view().column(0).num_children());
@@ -1503,20 +1502,20 @@ TEST_F(OrcReaderTest, DecimalOptions)
   dec128_col col{col_data, col_data + num_rows, mask};
   table_view expected({col});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("dec");
 
   auto filepath = temp_env->get_temp_filepath("OrcDecimalOptions.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options valid_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options valid_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .decimal128_columns({"dec", "fake_name"});
   // Should not throw, even with "fake name"
-  EXPECT_NO_THROW(cudf_io::read_orc(valid_opts));
+  EXPECT_NO_THROW(cudf::io::read_orc(valid_opts));
 }
 
 TEST_F(OrcWriterTest, DecimalOptionsNested)
@@ -1547,24 +1546,24 @@ TEST_F(OrcWriterTest, DecimalOptionsNested)
 
   table_view expected({*map_list_col});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("maps");
   expected_metadata.column_metadata[0].set_list_column_as_map();
   expected_metadata.column_metadata[0].child(1).child(0).child(0).set_name("dec64");
   expected_metadata.column_metadata[0].child(1).child(0).child(1).set_name("dec128");
 
   auto filepath = temp_env->get_temp_filepath("OrcMultiColumn.orc");
-  cudf_io::orc_writer_options out_opts =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_orc(out_opts);
+  cudf::io::write_orc(out_opts);
 
-  cudf_io::orc_reader_options in_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::orc_reader_options in_opts =
+    cudf::io::orc_reader_options::builder(cudf::io::source_info{filepath})
       .use_index(false)
       // One less level of nesting because children of map columns are the child struct's children
       .decimal128_columns({"maps.0.dec64"});
-  auto result = cudf_io::read_orc(in_opts);
+  auto result = cudf::io::read_orc(in_opts);
 
   // Both columns should be read as decimal128
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result.tbl->view().column(0).child(1).child(0).child(0),
@@ -1577,14 +1576,15 @@ TEST_F(OrcReaderTest, EmptyColumnsParam)
   auto const expected = create_random_fixed_table<int>(2, 4, false);
 
   std::vector<char> out_buffer;
-  cudf_io::orc_writer_options args =
-    cudf_io::orc_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected);
-  cudf_io::write_orc(args);
+  cudf::io::orc_writer_options args =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected);
+  cudf::io::write_orc(args);
 
-  cudf_io::orc_reader_options read_opts =
-    cudf_io::orc_reader_options::builder(cudf_io::source_info{out_buffer.data(), out_buffer.size()})
+  cudf::io::orc_reader_options read_opts =
+    cudf::io::orc_reader_options::builder(
+      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
       .columns({});
-  auto const result = cudf_io::read_orc(read_opts);
+  auto const result = cudf::io::read_orc(read_opts);
 
   EXPECT_EQ(result.tbl->num_columns(), 0);
   EXPECT_EQ(result.tbl->num_rows(), 0);
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index cf22ab8a525..8a98efabcb5 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -45,8 +45,6 @@
 #include <fstream>
 #include <type_traits>
 
-namespace cudf_io = cudf::io;
-
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
   typename std::conditional<std::is_same_v<T, cudf::string_view>,
@@ -430,13 +428,13 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumn)
   auto expected = table_view{{col}};
 
   auto filepath = temp_env->get_temp_filepath("SingleColumn.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -453,13 +451,13 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls)
   auto expected = table_view{{col}};
 
   auto filepath = temp_env->get_temp_filepath("SingleColumnWithNulls.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -477,14 +475,14 @@ TYPED_TEST(ParquetWriterChronoTypeTest, Chronos)
   auto expected = table_view{{col}};
 
   auto filepath = temp_env->get_temp_filepath("Chronos.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .timestamp_type(this->type());
-  auto result = cudf_io::read_parquet(in_opts);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -503,14 +501,14 @@ TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls)
   auto expected = table_view{{col}};
 
   auto filepath = temp_env->get_temp_filepath("ChronosWithNulls.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .timestamp_type(this->type());
-  auto result = cudf_io::read_parquet(in_opts);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -527,14 +525,14 @@ TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow)
   table_view expected({col});
 
   auto filepath = temp_env->get_temp_filepath("ParquetTimestampOverflow.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .timestamp_type(this->type());
-  auto result = cudf_io::read_parquet(in_opts);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -576,7 +574,7 @@ TEST_F(ParquetWriterTest, MultiColumn)
 
   auto expected = table_view{{col1, col2, col3, col4, col5, col6, col7, col8}};
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   // expected_metadata.column_metadata[0].set_name( "bools");
   expected_metadata.column_metadata[0].set_name("int8s");
   expected_metadata.column_metadata[1].set_name("int16s");
@@ -588,14 +586,14 @@ TEST_F(ParquetWriterTest, MultiColumn)
   expected_metadata.column_metadata[7].set_name("decimal128s").set_decimal_precision(40);
 
   auto filepath = temp_env->get_temp_filepath("MultiColumn.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -647,7 +645,7 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls)
 
   auto expected = table_view{{/*col0, */ col1, col2, col3, col4, col5, col6, col7}};
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   // expected_metadata.column_names.emplace_back("bools");
   expected_metadata.column_metadata[0].set_name("int8s");
   expected_metadata.column_metadata[1].set_name("int16s");
@@ -658,15 +656,15 @@ TEST_F(ParquetWriterTest, MultiColumnWithNulls)
   expected_metadata.column_metadata[6].set_name("decimal64s").set_decimal_precision(20);
 
   auto filepath = temp_env->get_temp_filepath("MultiColumnWithNulls.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
 
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   // TODO: Need to be able to return metadata in tree form from reader so they can be compared.
@@ -691,20 +689,20 @@ TEST_F(ParquetWriterTest, Strings)
 
   auto expected = table_view{{col0, col1, col2}};
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_other");
   expected_metadata.column_metadata[1].set_name("col_string");
   expected_metadata.column_metadata[2].set_name("col_another");
 
   auto filepath = temp_env->get_temp_filepath("Strings.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -740,7 +738,7 @@ TEST_F(ParquetWriterTest, StringsAsBinary)
 
   auto write_tbl = table_view{{col0, col1, col2, col3, col4}};
 
-  cudf_io::table_input_metadata expected_metadata(write_tbl);
+  cudf::io::table_input_metadata expected_metadata(write_tbl);
   expected_metadata.column_metadata[0].set_name("col_single").set_output_as_binary(true);
   expected_metadata.column_metadata[1].set_name("col_string").set_output_as_binary(true);
   expected_metadata.column_metadata[2].set_name("col_another").set_output_as_binary(true);
@@ -748,20 +746,20 @@ TEST_F(ParquetWriterTest, StringsAsBinary)
   expected_metadata.column_metadata[4].set_name("col_binary");
 
   auto filepath = temp_env->get_temp_filepath("BinaryStrings.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, write_tbl)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, write_tbl)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .set_column_schema(
-        {cudf_io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf_io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf_io::reader_column_schema().set_convert_binary_to_strings(false),
-         cudf_io::reader_column_schema().add_child(cudf_io::reader_column_schema()),
-         cudf_io::reader_column_schema().add_child(cudf_io::reader_column_schema())});
-  auto result   = cudf_io::read_parquet(in_opts);
+        {cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema()),
+         cudf::io::reader_column_schema().add_child(cudf::io::reader_column_schema())});
+  auto result   = cudf::io::read_parquet(in_opts);
   auto expected = table_view{{col3, col4, col3, col3, col4}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
@@ -855,7 +853,7 @@ TEST_F(ParquetWriterTest, SlicedTable)
   // auto expected_slice = expected;
   auto expected_slice = cudf::slice(expected, {2, static_cast<cudf::size_type>(num_rows) - 1});
 
-  cudf_io::table_input_metadata expected_metadata(expected_slice);
+  cudf::io::table_input_metadata expected_metadata(expected_slice);
   expected_metadata.column_metadata[0].set_name("col_other");
   expected_metadata.column_metadata[1].set_name("col_string");
   expected_metadata.column_metadata[2].set_name("col_another");
@@ -869,14 +867,14 @@ TEST_F(ParquetWriterTest, SlicedTable)
   expected_metadata.column_metadata[6].child(1).child(1).set_name("flats");
 
   auto filepath = temp_env->get_temp_filepath("SlicedTable.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected_slice)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected_slice)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_slice, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -956,7 +954,7 @@ TEST_F(ParquetWriterTest, ListColumn)
 
   table_view expected({col0, col1, col2, col3, /* col4, */ col5, col6, col7});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_list_int_0");
   expected_metadata.column_metadata[1].set_name("col_list_list_int_1");
   expected_metadata.column_metadata[2].set_name("col_list_list_int_nullable_2");
@@ -967,14 +965,14 @@ TEST_F(ParquetWriterTest, ListColumn)
   expected_metadata.column_metadata[6].set_name("col_list_list_list_7");
 
   auto filepath = temp_env->get_temp_filepath("ListColumn.parquet");
-  auto out_opts = cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
                     .metadata(&expected_metadata)
-                    .compression(cudf_io::compression_type::NONE);
+                    .compression(cudf::io::compression_type::NONE);
 
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  auto in_opts = cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result  = cudf_io::read_parquet(in_opts);
+  auto in_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result  = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -999,7 +997,7 @@ TEST_F(ParquetWriterTest, MultiIndex)
 
   auto expected = table_view{{col0, col1, col2, col3, col4}};
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("int8s");
   expected_metadata.column_metadata[1].set_name("int16s");
   expected_metadata.column_metadata[2].set_name("int32s");
@@ -1007,18 +1005,18 @@ TEST_F(ParquetWriterTest, MultiIndex)
   expected_metadata.column_metadata[4].set_name("doubles");
 
   auto filepath = temp_env->get_temp_filepath("MultiIndex.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata)
       .key_value_metadata(
         {{{"pandas", "\"index_columns\": [\"int8s\", \"int16s\"], \"column1\": [\"int32s\"]"}}});
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .use_pandas_metadata(true)
       .columns({"int32s", "floats", "doubles"});
-  auto result = cudf_io::read_parquet(in_opts);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1034,17 +1032,17 @@ TEST_F(ParquetWriterTest, HostBuffer)
 
   const auto expected = table_view{{col}};
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("col_other");
 
   std::vector<char> out_buffer;
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer), expected)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(out_opts);
-  cudf_io::parquet_reader_options in_opts = cudf_io::parquet_reader_options::builder(
-    cudf_io::source_info(out_buffer.data(), out_buffer.size()));
-  const auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::write_parquet(out_opts);
+  cudf::io::parquet_reader_options in_opts = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info(out_buffer.data(), out_buffer.size()));
+  const auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1056,13 +1054,13 @@ TEST_F(ParquetWriterTest, NonNullable)
   auto expected = create_random_fixed_table<int>(9, 9, false);
 
   auto filepath = temp_env->get_temp_filepath("NonNullable.parquet");
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -1095,13 +1093,13 @@ TEST_F(ParquetWriterTest, Struct)
   auto expected = table_view({*struct_2});
 
   auto filepath = temp_env->get_temp_filepath("Struct.parquet");
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options read_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath));
-  cudf_io::read_parquet(read_args);
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  cudf::io::read_parquet(read_args);
 }
 
 TEST_F(ParquetWriterTest, StructOfList)
@@ -1156,7 +1154,7 @@ TEST_F(ParquetWriterTest, StructOfList)
 
   auto expected = table_view({*struct_2});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("being");
   expected_metadata.column_metadata[0].child(0).set_name("human?");
   expected_metadata.column_metadata[0].child(1).set_name("particulars");
@@ -1166,14 +1164,14 @@ TEST_F(ParquetWriterTest, StructOfList)
   expected_metadata.column_metadata[0].child(1).child(3).set_name("flats");
 
   auto filepath = temp_env->get_temp_filepath("StructOfList.parquet");
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(args);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options read_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath));
-  const auto result = cudf_io::read_parquet(read_args);
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  const auto result = cudf::io::read_parquet(read_args);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1213,7 +1211,7 @@ TEST_F(ParquetWriterTest, ListOfStruct)
 
   auto expected = table_view({*list_col});
 
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[0].set_name("family");
   expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
   expected_metadata.column_metadata[0].child(1).child(1).set_name("particulars");
@@ -1221,14 +1219,14 @@ TEST_F(ParquetWriterTest, ListOfStruct)
   expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
 
   auto filepath = temp_env->get_temp_filepath("ListOfStruct.parquet");
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&expected_metadata);
-  cudf_io::write_parquet(args);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options read_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath));
-  const auto result = cudf_io::read_parquet(read_args);
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath));
+  const auto result = cudf::io::read_parquet(read_args);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1284,36 +1282,34 @@ TEST_F(ParquetWriterTest, CustomDataSink)
   auto filepath = temp_env->get_temp_filepath("CustomDataSink.parquet");
   custom_test_data_sink custom_sink(filepath);
 
-  namespace cudf_io = cudf::io;
-
   srand(31337);
   auto expected = create_random_fixed_table<int>(5, 10, false);
 
   // write out using the custom sink
   {
-    cudf_io::parquet_writer_options args =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-    cudf_io::write_parquet(args);
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+    cudf::io::write_parquet(args);
   }
 
   // write out using a memmapped sink
   std::vector<char> buf_sink;
   {
-    cudf_io::parquet_writer_options args =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&buf_sink}, *expected);
-    cudf_io::write_parquet(args);
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&buf_sink}, *expected);
+    cudf::io::write_parquet(args);
   }
 
   // read them back in and make sure everything matches
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 
-  cudf_io::parquet_reader_options buf_args = cudf_io::parquet_reader_options::builder(
-    cudf_io::source_info{buf_sink.data(), buf_sink.size()});
-  auto buf_tbl = cudf_io::read_parquet(buf_args);
+  cudf::io::parquet_reader_options buf_args = cudf::io::parquet_reader_options::builder(
+    cudf::io::source_info{buf_sink.data(), buf_sink.size()});
+  auto buf_tbl = cudf::io::read_parquet(buf_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(buf_tbl.tbl->view(), expected->view());
 }
 
@@ -1322,20 +1318,18 @@ TEST_F(ParquetWriterTest, DeviceWriteLargeishFile)
   auto filepath = temp_env->get_temp_filepath("DeviceWriteLargeishFile.parquet");
   custom_test_data_sink custom_sink(filepath);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_random_fixed_table<int>(4, 4 * 1024 * 1024, false);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -1354,19 +1348,19 @@ TEST_F(ParquetWriterTest, PartitionedWrite)
   auto expected2 =
     cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
 
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(
-      cudf_io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
       .partitions({partition1, partition2})
-      .compression(cudf_io::compression_type::NONE);
-  cudf_io::write_parquet(args);
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
 
-  auto result1 = cudf_io::read_parquet(
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1)));
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
 
-  auto result2 = cudf_io::read_parquet(
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2)));
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
 }
 
@@ -1385,19 +1379,19 @@ TEST_F(ParquetWriterTest, PartitionedWriteEmptyPartitions)
   auto expected2 =
     cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
 
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(
-      cudf_io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
       .partitions({partition1, partition2})
-      .compression(cudf_io::compression_type::NONE);
-  cudf_io::write_parquet(args);
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
 
-  auto result1 = cudf_io::read_parquet(
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1)));
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
 
-  auto result2 = cudf_io::read_parquet(
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2)));
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
 }
 
@@ -1416,19 +1410,19 @@ TEST_F(ParquetWriterTest, PartitionedWriteEmptyColumns)
   auto expected2 =
     cudf::slice(*source, {partition2.start_row, partition2.start_row + partition2.num_rows});
 
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(
-      cudf_io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(
+      cudf::io::sink_info(std::vector<std::string>{filepath1, filepath2}), *source)
       .partitions({partition1, partition2})
-      .compression(cudf_io::compression_type::NONE);
-  cudf_io::write_parquet(args);
+      .compression(cudf::io::compression_type::NONE);
+  cudf::io::write_parquet(args);
 
-  auto result1 = cudf_io::read_parquet(
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath1)));
+  auto result1 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath1)));
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected1, result1.tbl->view());
 
-  auto result2 = cudf_io::read_parquet(
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath2)));
+  auto result2 = cudf::io::read_parquet(
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath2)));
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected2, result2.tbl->view());
 }
 
@@ -1439,9 +1433,9 @@ std::string create_parquet_file(int num_cols)
   auto const table = create_random_fixed_table<T>(num_cols, 10, true);
   auto const filepath =
     temp_env->get_temp_filepath(typeid(T).name() + std::to_string(num_cols) + ".parquet");
-  cudf_io::parquet_writer_options const out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table->view());
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options const out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table->view());
+  cudf::io::write_parquet(out_opts);
   return filepath;
 }
 
@@ -1451,16 +1445,16 @@ TEST_F(ParquetWriterTest, MultipleMismatchedSources)
   {
     auto const float5file = create_parquet_file<float>(5);
     std::vector<std::string> files{int5file, float5file};
-    cudf_io::parquet_reader_options const read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{files});
-    EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error);
+    cudf::io::parquet_reader_options const read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
+    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
   }
   {
     auto const int10file = create_parquet_file<int>(10);
     std::vector<std::string> files{int5file, int10file};
-    cudf_io::parquet_reader_options const read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{files});
-    EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error);
+    cudf::io::parquet_reader_options const read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{files});
+    EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
   }
 }
 
@@ -1473,13 +1467,13 @@ TEST_F(ParquetWriterTest, Slice)
   cudf::table_view tbl{result};
 
   auto filepath = temp_env->get_temp_filepath("Slice.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto read_table = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto read_table = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(read_table.tbl->view(), tbl);
 }
@@ -1490,13 +1484,13 @@ TEST_F(ParquetChunkedWriterTest, SingleTable)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedSingle.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(*table1);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *table1);
 }
@@ -1510,13 +1504,13 @@ TEST_F(ParquetChunkedWriterTest, SimpleTable)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedSimple.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -1530,14 +1524,14 @@ TEST_F(ParquetChunkedWriterTest, LargeTables)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLarge.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  auto md = cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2).close();
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  auto md = cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2).close();
   CUDF_EXPECTS(!md, "The return value should be null.");
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -1557,18 +1551,18 @@ TEST_F(ParquetChunkedWriterTest, ManyTables)
   auto expected = cudf::concatenate(table_views);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedManyTables.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer writer(args);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
   std::for_each(table_views.begin(), table_views.end(), [&writer](table_view const& tbl) {
     writer.write(tbl);
   });
   auto md = writer.close({"dummy/path"});
   CUDF_EXPECTS(md, "The returned metadata should not be null.");
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -1592,13 +1586,13 @@ TEST_F(ParquetChunkedWriterTest, Strings)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedStrings.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -1651,13 +1645,13 @@ TEST_F(ParquetChunkedWriterTest, ListColumn)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl0, tbl1}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(tbl0).write(tbl1);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl0).write(tbl1);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -1704,7 +1698,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct)
 
   auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
-  cudf_io::table_input_metadata expected_metadata(table_1);
+  cudf::io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("family");
   expected_metadata.column_metadata[0].child(1).set_nullability(false);
   expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
@@ -1713,14 +1707,14 @@ TEST_F(ParquetChunkedWriterTest, ListOfStruct)
   expected_metadata.column_metadata[0].child(1).child(1).child(1).set_name("age");
 
   auto filepath = temp_env->get_temp_filepath("ChunkedListOfStruct.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
   args.set_metadata(&expected_metadata);
-  cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1795,7 +1789,7 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
 
   auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
-  cudf_io::table_input_metadata expected_metadata(table_1);
+  cudf::io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("family");
   expected_metadata.column_metadata[0].child(1).set_nullability(false);
   expected_metadata.column_metadata[0].child(1).child(0).set_name("human?");
@@ -1806,14 +1800,14 @@ TEST_F(ParquetChunkedWriterTest, ListOfStructOfStructOfListOfList)
   expected_metadata.column_metadata[0].child(1).child(1).child(3).set_name("flats");
 
   auto filepath = temp_env->get_temp_filepath("ListOfStructOfStructOfListOfList.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
   args.set_metadata(&expected_metadata);
-  cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -1831,9 +1825,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedTypes)
   auto table2 = create_random_fixed_table<float>(4, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedTypes.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer writer(args);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
   writer.write(*table1);
   EXPECT_THROW(writer.write(*table2), cudf::logic_error);
   writer.close();
@@ -1845,9 +1839,9 @@ TEST_F(ParquetChunkedWriterTest, ChunkedWriteAfterClosing)
   auto table = create_random_fixed_table<int>(4, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedWriteAfterClosing.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer writer(args);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
   writer.write(*table).close();
   EXPECT_THROW(writer.write(*table), cudf::logic_error);
 }
@@ -1858,14 +1852,14 @@ TEST_F(ParquetChunkedWriterTest, ReadingUnclosedFile)
   auto table = create_random_fixed_table<int>(4, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ReadingUnclosedFile.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer writer(args);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
   writer.write(*table);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
 }
 
 TEST_F(ParquetChunkedWriterTest, MismatchedStructure)
@@ -1875,9 +1869,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructure)
   auto table2 = create_random_fixed_table<float>(3, 4, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedMismatchedStructure.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer writer(args);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
   writer.write(*table1);
   EXPECT_THROW(writer.write(*table2), cudf::logic_error);
   writer.close();
@@ -1915,9 +1909,9 @@ TEST_F(ParquetChunkedWriterTest, MismatchedStructureList)
   auto tbl1 = table_view({col01, col11});
 
   auto filepath = temp_env->get_temp_filepath("ChunkedLists.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer writer(args);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer writer(args);
   writer.write(tbl0);
   EXPECT_THROW(writer.write(tbl1), cudf::logic_error);
 }
@@ -1931,13 +1925,13 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullability)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedNullable.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -1969,7 +1963,7 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
 
   auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
-  cudf_io::table_input_metadata expected_metadata(table_1);
+  cudf::io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("being");
   expected_metadata.column_metadata[0].child(0).set_name("human?");
   expected_metadata.column_metadata[0].child(1).set_name("particulars");
@@ -1977,14 +1971,14 @@ TEST_F(ParquetChunkedWriterTest, DifferentNullabilityStruct)
   expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
 
   auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
   args.set_metadata(&expected_metadata);
-  cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.tbl, *full_table);
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -2000,7 +1994,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability)
 
   auto filepath = temp_env->get_temp_filepath("ChunkedNoNullable.parquet");
 
-  cudf_io::table_input_metadata metadata(*table1);
+  cudf::io::table_input_metadata metadata(*table1);
 
   // In the absence of prescribed per-column nullability in metadata, the writer assumes the worst
   // and considers all columns nullable. However cudf::concatenate will not force nulls in case no
@@ -2010,14 +2004,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullability)
     col_meta.set_nullability(false);
   }
 
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath})
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
       .metadata(&metadata);
-  cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2);
+  cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -2057,7 +2051,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
 
   auto full_table = cudf::concatenate(std::vector<table_view>({table1, table2}));
 
-  cudf_io::table_input_metadata metadata(table1);
+  cudf::io::table_input_metadata metadata(table1);
   metadata.column_metadata[0].set_nullability(true);  // List is nullable at first (root) level
   metadata.column_metadata[0].child(1).set_nullability(
     false);  // non-nullable at second (leaf) level
@@ -2065,14 +2059,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList)
 
   auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet");
 
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath})
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath})
       .metadata(&metadata);
-  cudf_io::parquet_chunked_writer(args).write(table1).write(table2);
+  cudf::io::parquet_chunked_writer(args).write(table1).write(table2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -2102,7 +2096,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
 
   auto full_table = cudf::concatenate(std::vector<table_view>({table_1, table_2}));
 
-  cudf_io::table_input_metadata expected_metadata(table_1);
+  cudf::io::table_input_metadata expected_metadata(table_1);
   expected_metadata.column_metadata[0].set_name("being").set_nullability(false);
   expected_metadata.column_metadata[0].child(0).set_name("human?").set_nullability(false);
   expected_metadata.column_metadata[0].child(1).set_name("particulars");
@@ -2110,14 +2104,14 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityStruct)
   expected_metadata.column_metadata[0].child(1).child(1).set_name("age");
 
   auto filepath = temp_env->get_temp_filepath("ChunkedNullableStruct.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
   args.set_metadata(&expected_metadata);
-  cudf_io::parquet_chunked_writer(args).write(table_1).write(table_2);
+  cudf::io::parquet_chunked_writer(args).write(table_1).write(table_2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
@@ -2132,16 +2126,16 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroups)
   auto full_table = cudf::concatenate(std::vector<table_view>({*table2, *table1, *table2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedRowGroups.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
   {
-    cudf_io::parquet_chunked_writer(args).write(*table1).write(*table2);
+    cudf::io::parquet_chunked_writer(args).write(*table1).write(*table2);
   }
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .row_groups({{1, 0, 1}});
-  auto result = cudf_io::read_parquet(read_opts);
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table);
 }
@@ -2152,17 +2146,17 @@ TEST_F(ParquetChunkedWriterTest, ReadRowGroupsError)
   auto table1 = create_random_fixed_table<int>(5, 5, true);
 
   auto filepath = temp_env->get_temp_filepath("ChunkedRowGroupsError.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(*table1);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(*table1);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).row_groups({{0, 1}});
-  EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).row_groups({{0, 1}});
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
   read_opts.set_row_groups({{-1}});
-  EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error);
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
   read_opts.set_row_groups({{0}, {0}});
-  EXPECT_THROW(cudf_io::read_parquet(read_opts), cudf::logic_error);
+  EXPECT_THROW(cudf::io::read_parquet(read_opts), cudf::logic_error);
 }
 
 TEST_F(ParquetWriterTest, DecimalWrite)
@@ -2182,26 +2176,26 @@ TEST_F(ParquetWriterTest, DecimalWrite)
   auto table = table_view({col0, col1});
 
   auto filepath = temp_env->get_temp_filepath("DecimalWrite.parquet");
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, table);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, table);
 
-  cudf_io::table_input_metadata expected_metadata(table);
+  cudf::io::table_input_metadata expected_metadata(table);
 
   // verify failure if too small a precision is given
   expected_metadata.column_metadata[0].set_decimal_precision(7);
   expected_metadata.column_metadata[1].set_decimal_precision(1);
   args.set_metadata(&expected_metadata);
-  EXPECT_THROW(cudf_io::write_parquet(args), cudf::logic_error);
+  EXPECT_THROW(cudf::io::write_parquet(args), cudf::logic_error);
 
   // verify success if equal precision is given
   expected_metadata.column_metadata[0].set_decimal_precision(7);
   expected_metadata.column_metadata[1].set_decimal_precision(9);
   args.set_metadata(&expected_metadata);
-  cudf_io::write_parquet(args);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, table);
 }
@@ -2243,13 +2237,13 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -2291,13 +2285,13 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
 
   auto filepath = temp_env->get_temp_filepath("ChunkedUnalignedSize2.parquet");
-  cudf_io::chunked_parquet_writer_options args =
-    cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info{filepath});
-  cudf_io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
+  cudf::io::chunked_parquet_writer_options args =
+    cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info{filepath});
+  cudf::io::parquet_chunked_writer(args).write(tbl1).write(tbl2);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_opts);
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
 }
@@ -2350,20 +2344,18 @@ TEST_F(ParquetWriterStressTest, LargeTableWeakCompression)
   mm_buf.reserve(4 * 1024 * 1024 * 16);
   custom_test_memmap_sink<false> custom_sink(&mm_buf);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -2373,20 +2365,18 @@ TEST_F(ParquetWriterStressTest, LargeTableGoodCompression)
   mm_buf.reserve(4 * 1024 * 1024 * 16);
   custom_test_memmap_sink<false> custom_sink(&mm_buf);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -2396,20 +2386,18 @@ TEST_F(ParquetWriterStressTest, LargeTableWithValids)
   mm_buf.reserve(4 * 1024 * 1024 * 16);
   custom_test_memmap_sink<false> custom_sink(&mm_buf);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -2419,20 +2407,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWeakCompression)
   mm_buf.reserve(4 * 1024 * 1024 * 16);
   custom_test_memmap_sink<true> custom_sink(&mm_buf);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_random_fixed_table<int>(16, 4 * 1024 * 1024, false);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -2442,20 +2428,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableGoodCompression)
   mm_buf.reserve(4 * 1024 * 1024 * 16);
   custom_test_memmap_sink<true> custom_sink(&mm_buf);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 128 * 1024, false);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -2465,20 +2449,18 @@ TEST_F(ParquetWriterStressTest, DeviceWriteLargeTableWithValids)
   mm_buf.reserve(4 * 1024 * 1024 * 16);
   custom_test_memmap_sink<true> custom_sink(&mm_buf);
 
-  namespace cudf_io = cudf::io;
-
   // exercises multiple rowgroups
   srand(31337);
   auto expected = create_compressible_fixed_table<int>(16, 4 * 1024 * 1024, 6, true);
 
   // write out using the custom sink (which uses device writes)
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&custom_sink}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&custom_sink}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options custom_args =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{mm_buf.data(), mm_buf.size()});
-  auto custom_tbl = cudf_io::read_parquet(custom_args);
+  cudf::io::parquet_reader_options custom_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{mm_buf.data(), mm_buf.size()});
+  auto custom_tbl = cudf::io::read_parquet(custom_args);
   CUDF_TEST_EXPECT_TABLES_EQUAL(custom_tbl.tbl->view(), expected->view());
 }
 
@@ -2491,14 +2473,14 @@ TEST_F(ParquetReaderTest, UserBounds)
     auto expected = create_random_fixed_table<int>(4, 4, false);
 
     auto filepath = temp_env->get_temp_filepath("TooManyRows.parquet");
-    cudf_io::parquet_writer_options args =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected);
-    cudf_io::write_parquet(args);
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
 
     // attempt to read more rows than there actually are
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).num_rows(16);
-    auto result = cudf_io::read_parquet(read_opts);
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(16);
+    auto result = cudf::io::read_parquet(read_opts);
 
     // we should only get back 4 rows
     EXPECT_EQ(result.tbl->view().column(0).size(), 4);
@@ -2511,14 +2493,14 @@ TEST_F(ParquetReaderTest, UserBounds)
     auto expected = create_random_fixed_table<int>(4, 4, false);
 
     auto filepath = temp_env->get_temp_filepath("PastBounds.parquet");
-    cudf_io::parquet_writer_options args =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected);
-    cudf_io::write_parquet(args);
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
 
     // attempt to read more rows than there actually are
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).skip_rows(4);
-    auto result = cudf_io::read_parquet(read_opts);
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).skip_rows(4);
+    auto result = cudf::io::read_parquet(read_opts);
 
     // we should get empty columns back
     EXPECT_EQ(result.tbl->view().num_columns(), 4);
@@ -2533,14 +2515,14 @@ TEST_F(ParquetReaderTest, UserBounds)
     auto expected = create_random_fixed_table<int>(4, 4, false);
 
     auto filepath = temp_env->get_temp_filepath("ZeroRows.parquet");
-    cudf_io::parquet_writer_options args =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected);
-    cudf_io::write_parquet(args);
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
 
     // attempt to read more rows than there actually are
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).num_rows(0);
-    auto result = cudf_io::read_parquet(read_opts);
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).num_rows(0);
+    auto result = cudf::io::read_parquet(read_opts);
 
     EXPECT_EQ(result.tbl->view().num_columns(), 4);
     EXPECT_EQ(result.tbl->view().column(0).size(), 0);
@@ -2553,16 +2535,16 @@ TEST_F(ParquetReaderTest, UserBounds)
     auto expected = create_random_fixed_table<int>(4, 4, false);
 
     auto filepath = temp_env->get_temp_filepath("ZeroRowsPastBounds.parquet");
-    cudf_io::parquet_writer_options args =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, *expected);
-    cudf_io::write_parquet(args);
+    cudf::io::parquet_writer_options args =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected);
+    cudf::io::write_parquet(args);
 
     // attempt to read more rows than there actually are
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
         .skip_rows(4)
         .num_rows(0);
-    auto result = cudf_io::read_parquet(read_opts);
+    auto result = cudf::io::read_parquet(read_opts);
 
     // we should get empty columns back
     EXPECT_EQ(result.tbl->view().num_columns(), 4);
@@ -2578,9 +2560,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls)
   // clang-format on
   cudf::table_view tbl({col});
   auto filepath = temp_env->get_temp_filepath("UserBoundsWithNulls.parquet");
-  cudf_io::parquet_writer_options out_args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_parquet(out_args);
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
 
   // skip_rows / num_rows
   // clang-format off
@@ -2592,11 +2574,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls)
                                            {62, 2}, {63, 1}};
   // clang-format on
   for (auto p : params) {
-    cudf_io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
     if (p.first >= 0) { read_args.set_skip_rows(p.first); }
     if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf_io::read_parquet(read_args);
+    auto result = cudf::io::read_parquet(read_args);
 
     p.first  = p.first < 0 ? 0 : p.first;
     p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
@@ -2622,9 +2604,9 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
   // this file will have row groups of 1,000,000 each
   cudf::table_view tbl({col});
   auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsLarge.parquet");
-  cudf_io::parquet_writer_options out_args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_parquet(out_args);
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
 
   // skip_rows / num_rows
   // clang-format off
@@ -2636,11 +2618,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
                                            {24001231, 17}, {29000001, 989999}, {29999999, 1} };
   // clang-format on
   for (auto p : params) {
-    cudf_io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
     if (p.first >= 0) { read_args.set_skip_rows(p.first); }
     if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf_io::read_parquet(read_args);
+    auto result = cudf::io::read_parquet(read_args);
 
     p.first  = p.first < 0 ? 0 : p.first;
     p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
@@ -2660,9 +2642,9 @@ TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
   // this file will have row groups of 1,000,000 each
   cudf::table_view tbl({col});
   auto filepath = temp_env->get_temp_filepath("ListUserBoundsWithNullsLarge.parquet");
-  cudf_io::parquet_writer_options out_args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl);
-  cudf_io::write_parquet(out_args);
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
 
   // skip_rows / num_rows
   // clang-format off
@@ -2674,11 +2656,11 @@ TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
                                            {4001231, 17}, {1900000, 989999}, {4999999, 1} };
   // clang-format on
   for (auto p : params) {
-    cudf_io::parquet_reader_options read_args =
-      cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
     if (p.first >= 0) { read_args.set_skip_rows(p.first); }
     if (p.second >= 0) { read_args.set_num_rows(p.second); }
-    auto result = cudf_io::read_parquet(read_args);
+    auto result = cudf::io::read_parquet(read_args);
 
     p.first  = p.first < 0 ? 0 : p.first;
     p.second = p.second < 0 ? static_cast<cudf::column_view>(col).size() - p.first : p.second;
@@ -2697,17 +2679,18 @@ TEST_F(ParquetReaderTest, ReorderedColumns)
 
     cudf::table_view tbl{{a, b}};
     auto filepath = temp_env->get_temp_filepath("ReorderedColumns.parquet");
-    cudf_io::table_input_metadata md(tbl);
+    cudf::io::table_input_metadata md(tbl);
     md.column_metadata[0].set_name("a");
     md.column_metadata[1].set_name("b");
-    cudf_io::parquet_writer_options opts =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md);
-    cudf_io::write_parquet(opts);
+    cudf::io::parquet_writer_options opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
+    cudf::io::write_parquet(opts);
 
     // read them out of order
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).columns({"b", "a"});
-    auto result = cudf_io::read_parquet(read_opts);
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
@@ -2719,17 +2702,18 @@ TEST_F(ParquetReaderTest, ReorderedColumns)
 
     cudf::table_view tbl{{a, b}};
     auto filepath = temp_env->get_temp_filepath("ReorderedColumns2.parquet");
-    cudf_io::table_input_metadata md(tbl);
+    cudf::io::table_input_metadata md(tbl);
     md.column_metadata[0].set_name("a");
     md.column_metadata[1].set_name("b");
-    cudf_io::parquet_writer_options opts =
-      cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md);
-    cudf_io::write_parquet(opts);
+    cudf::io::parquet_writer_options opts =
+      cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
+    cudf::io::write_parquet(opts);
 
     // read them out of order
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).columns({"b", "a"});
-    auto result = cudf_io::read_parquet(read_opts);
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+        .columns({"b", "a"});
+    auto result = cudf::io::read_parquet(read_opts);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), b);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
@@ -2744,21 +2728,21 @@ TEST_F(ParquetReaderTest, ReorderedColumns)
 
   cudf::table_view tbl{{a, b, c, d}};
   auto filepath = temp_env->get_temp_filepath("ReorderedColumns3.parquet");
-  cudf_io::table_input_metadata md(tbl);
+  cudf::io::table_input_metadata md(tbl);
   md.column_metadata[0].set_name("a");
   md.column_metadata[1].set_name("b");
   md.column_metadata[2].set_name("c");
   md.column_metadata[3].set_name("d");
-  cudf_io::parquet_writer_options opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, tbl).metadata(&md);
-  cudf_io::write_parquet(opts);
+  cudf::io::parquet_writer_options opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl).metadata(&md);
+  cudf::io::write_parquet(opts);
 
   {
     // read them out of order
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
         .columns({"d", "a", "b", "c"});
-    auto result = cudf_io::read_parquet(read_opts);
+    auto result = cudf::io::read_parquet(read_opts);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), a);
@@ -2768,10 +2752,10 @@ TEST_F(ParquetReaderTest, ReorderedColumns)
 
   {
     // read them out of order
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
         .columns({"c", "d", "a", "b"});
-    auto result = cudf_io::read_parquet(read_opts);
+    auto result = cudf::io::read_parquet(read_opts);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), c);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), d);
@@ -2781,10 +2765,10 @@ TEST_F(ParquetReaderTest, ReorderedColumns)
 
   {
     // read them out of order
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
         .columns({"d", "c", "b", "a"});
-    auto result = cudf_io::read_parquet(read_opts);
+    auto result = cudf::io::read_parquet(read_opts);
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), d);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), c);
@@ -2818,7 +2802,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
 
   auto input = table_view({*struct_2});
 
-  cudf_io::table_input_metadata input_metadata(input);
+  cudf::io::table_input_metadata input_metadata(input);
   input_metadata.column_metadata[0].set_name("being");
   input_metadata.column_metadata[0].child(0).set_name("human?");
   input_metadata.column_metadata[0].child(1).set_name("particulars");
@@ -2826,16 +2810,16 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
   input_metadata.column_metadata[0].child(1).child(1).set_name("age");
 
   auto filepath = temp_env->get_temp_filepath("SelectNestedColumn.parquet");
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, input)
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, input)
       .metadata(&input_metadata);
-  cudf_io::write_parquet(args);
+  cudf::io::write_parquet(args);
 
   {  // Test selecting a single leaf from the table
-    cudf_io::parquet_reader_options read_args =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath))
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
         .columns({"being.particulars.age"});
-    const auto result = cudf_io::read_parquet(read_args);
+    const auto result = cudf::io::read_parquet(read_args);
 
     auto expect_ages_col = cudf::test::fixed_width_column_wrapper<int32_t>{
       {48, 27, 25, 31, 351, 351}, {1, 1, 1, 1, 1, 0}};
@@ -2844,7 +2828,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
       cudf::test::structs_column_wrapper{{expect_s_1}, {0, 1, 1, 1, 1, 1}}.release();
     auto expected = table_view({*expect_s_2});
 
-    cudf_io::table_input_metadata expected_metadata(expected);
+    cudf::io::table_input_metadata expected_metadata(expected);
     expected_metadata.column_metadata[0].set_name("being");
     expected_metadata.column_metadata[0].child(0).set_name("particulars");
     expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
@@ -2854,10 +2838,10 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
   }
 
   {  // Test selecting a non-leaf and expecting all hierarchy from that node onwards
-    cudf_io::parquet_reader_options read_args =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath))
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
         .columns({"being.particulars"});
-    const auto result = cudf_io::read_parquet(read_args);
+    const auto result = cudf::io::read_parquet(read_args);
 
     auto expected_weights_col =
       cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
@@ -2872,7 +2856,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
       cudf::test::structs_column_wrapper{{expected_s_1}, {0, 1, 1, 1, 1, 1}}.release();
     auto expected = table_view({*expect_s_2});
 
-    cudf_io::table_input_metadata expected_metadata(expected);
+    cudf::io::table_input_metadata expected_metadata(expected);
     expected_metadata.column_metadata[0].set_name("being");
     expected_metadata.column_metadata[0].child(0).set_name("particulars");
     expected_metadata.column_metadata[0].child(0).child(0).set_name("weight");
@@ -2883,10 +2867,10 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
   }
 
   {  // Test selecting struct children out of order
-    cudf_io::parquet_reader_options read_args =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info(filepath))
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info(filepath))
         .columns({"being.particulars.age", "being.particulars.weight", "being.human?"});
-    const auto result = cudf_io::read_parquet(read_args);
+    const auto result = cudf::io::read_parquet(read_args);
 
     auto expected_weights_col =
       cudf::test::fixed_width_column_wrapper<float>{1.1, 2.4, 5.3, 8.0, 9.6, 6.9};
@@ -2906,7 +2890,7 @@ TEST_F(ParquetReaderTest, SelectNestedColumn)
 
     auto expected = table_view({*expect_s_2});
 
-    cudf_io::table_input_metadata expected_metadata(expected);
+    cudf::io::table_input_metadata expected_metadata(expected);
     expected_metadata.column_metadata[0].set_name("being");
     expected_metadata.column_metadata[0].child(0).set_name("particulars");
     expected_metadata.column_metadata[0].child(0).child(0).set_name("age");
@@ -3086,9 +3070,9 @@ TEST_F(ParquetReaderTest, DecimalRead)
       0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
     unsigned int decimals_parquet_len = 2366;
 
-    cudf_io::parquet_reader_options read_opts = cudf_io::parquet_reader_options::builder(
-      cudf_io::source_info{reinterpret_cast<const char*>(decimals_parquet), decimals_parquet_len});
-    auto result = cudf_io::read_parquet(read_opts);
+    cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{reinterpret_cast<const char*>(decimals_parquet), decimals_parquet_len});
+    auto result = cudf::io::read_parquet(read_opts);
 
     auto validity =
       cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 50; });
@@ -3134,9 +3118,9 @@ TEST_F(ParquetReaderTest, DecimalRead)
       std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
 
-    cudf_io::parquet_reader_options read_strict_opts = read_opts;
+    cudf::io::parquet_reader_options read_strict_opts = read_opts;
     read_strict_opts.set_columns({"dec7p4", "dec14p5"});
-    EXPECT_NO_THROW(cudf_io::read_parquet(read_strict_opts));
+    EXPECT_NO_THROW(cudf::io::read_parquet(read_strict_opts));
   }
   {
     // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4)
@@ -3229,10 +3213,10 @@ TEST_F(ParquetReaderTest, DecimalRead)
 
     unsigned int parquet_len = 1226;
 
-    cudf_io::parquet_reader_options read_opts =
-      cudf_io::parquet_reader_options::builder(cudf_io::source_info{
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{
         reinterpret_cast<const char*>(fixed_len_bytes_decimal_parquet), parquet_len});
-    auto result = cudf_io::read_parquet(read_opts);
+    auto result = cudf::io::read_parquet(read_opts);
     EXPECT_EQ(result.tbl->view().num_columns(), 3);
 
     auto validity_c0    = cudf::test::iterators::nulls_at({19});
@@ -3324,18 +3308,18 @@ TEST_F(ParquetReaderTest, EmptyOutput)
   table_view expected({c0, c1, c2, *c3, c4});
 
   // set precision on the decimal column
-  cudf_io::table_input_metadata expected_metadata(expected);
+  cudf::io::table_input_metadata expected_metadata(expected);
   expected_metadata.column_metadata[2].set_decimal_precision(1);
 
   auto filepath = temp_env->get_temp_filepath("EmptyOutput.parquet");
-  cudf_io::parquet_writer_options out_args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected);
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
   out_args.set_metadata(&expected_metadata);
-  cudf_io::write_parquet(out_args);
+  cudf::io::write_parquet(out_args);
 
-  cudf_io::parquet_reader_options read_args =
-    cudf::io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  auto result = cudf_io::read_parquet(read_args);
+  cudf::io::parquet_reader_options read_args =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(read_args);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -3345,33 +3329,33 @@ TEST_F(ParquetWriterTest, RowGroupSizeInvalid)
   const auto unused_table = std::make_unique<table>();
   std::vector<char> out_buffer;
 
-  EXPECT_THROW(
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
-      .row_group_size_rows(4999),
-    cudf::logic_error);
-  EXPECT_THROW(
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
-      .max_page_size_rows(4999),
-    cudf::logic_error);
-  EXPECT_THROW(
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
-      .row_group_size_bytes(3 << 10),
-    cudf::logic_error);
-  EXPECT_THROW(
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
-      .max_page_size_bytes(3 << 10),
-    cudf::logic_error);
-
-  EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer))
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
                  .row_group_size_rows(4999),
                cudf::logic_error);
-  EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer))
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
                  .max_page_size_rows(4999),
                cudf::logic_error);
-  EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer))
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
                  .row_group_size_bytes(3 << 10),
                cudf::logic_error);
-  EXPECT_THROW(cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer))
+  EXPECT_THROW(cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                         unused_table->view())
+                 .max_page_size_bytes(3 << 10),
+               cudf::logic_error);
+
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .row_group_size_rows(4999),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .max_page_size_rows(4999),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
+                 .row_group_size_bytes(3 << 10),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
                  .max_page_size_bytes(3 << 10),
                cudf::logic_error);
 }
@@ -3381,13 +3365,13 @@ TEST_F(ParquetWriterTest, RowGroupPageSizeMatch)
   const auto unused_table = std::make_unique<table>();
   std::vector<char> out_buffer;
 
-  auto options =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info(&out_buffer), unused_table->view())
-      .row_group_size_bytes(128 * 1024)
-      .max_page_size_bytes(512 * 1024)
-      .row_group_size_rows(10000)
-      .max_page_size_rows(20000)
-      .build();
+  auto options = cudf::io::parquet_writer_options::builder(cudf::io::sink_info(&out_buffer),
+                                                           unused_table->view())
+                   .row_group_size_bytes(128 * 1024)
+                   .max_page_size_bytes(512 * 1024)
+                   .row_group_size_rows(10000)
+                   .max_page_size_rows(20000)
+                   .build();
   EXPECT_EQ(options.get_row_group_size_bytes(), options.get_max_page_size_bytes());
   EXPECT_EQ(options.get_row_group_size_rows(), options.get_max_page_size_rows());
 }
@@ -3396,7 +3380,7 @@ TEST_F(ParquetChunkedWriterTest, RowGroupPageSizeMatch)
 {
   std::vector<char> out_buffer;
 
-  auto options = cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info(&out_buffer))
+  auto options = cudf::io::chunked_parquet_writer_options::builder(cudf::io::sink_info(&out_buffer))
                    .row_group_size_bytes(128 * 1024)
                    .max_page_size_bytes(512 * 1024)
                    .row_group_size_rows(10000)
@@ -3420,7 +3404,7 @@ TEST_F(ParquetWriterTest, EmptyList)
   cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
                                                                    cudf::table_view({*L0})));
 
-  auto result = cudf_io::read_parquet(
+  auto result = cudf::io::read_parquet(
     cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
 
   using lcw     = cudf::test::lists_column_wrapper<int64_t>;
@@ -3447,7 +3431,7 @@ TEST_F(ParquetWriterTest, DeepEmptyList)
   cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
                                                                    cudf::table_view({*L0})));
 
-  auto result = cudf_io::read_parquet(
+  auto result = cudf::io::read_parquet(
     cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
@@ -3472,7 +3456,7 @@ TEST_F(ParquetWriterTest, EmptyListWithStruct)
   auto filepath = temp_env->get_temp_filepath("EmptyListWithStruct.parquet");
   cudf::io::write_parquet(cudf::io::parquet_writer_options_builder(cudf::io::sink_info(filepath),
                                                                    cudf::table_view({*L0})));
-  auto result = cudf_io::read_parquet(
+  auto result = cudf::io::read_parquet(
     cudf::io::parquet_reader_options_builder(cudf::io::source_info(filepath)));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), *L0);
@@ -4362,15 +4346,15 @@ TEST_F(ParquetReaderTest, EmptyColumnsParam)
   auto const expected = create_random_fixed_table<int>(2, 4, false);
 
   std::vector<char> out_buffer;
-  cudf_io::parquet_writer_options args =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected);
-  cudf_io::write_parquet(args);
+  cudf::io::parquet_writer_options args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{&out_buffer}, *expected);
+  cudf::io::write_parquet(args);
 
-  cudf_io::parquet_reader_options read_opts =
-    cudf_io::parquet_reader_options::builder(
-      cudf_io::source_info{out_buffer.data(), out_buffer.size()})
+  cudf::io::parquet_reader_options read_opts =
+    cudf::io::parquet_reader_options::builder(
+      cudf::io::source_info{out_buffer.data(), out_buffer.size()})
       .columns({});
-  auto const result = cudf_io::read_parquet(read_opts);
+  auto const result = cudf::io::read_parquet(read_opts);
 
   EXPECT_EQ(result.tbl->num_columns(), 0);
   EXPECT_EQ(result.tbl->num_rows(), 0);
@@ -4401,7 +4385,7 @@ TEST_F(ParquetReaderTest, BinaryAsStrings)
     {'F', 'u', 'n', 'd', 'a', 'y'}};
 
   auto output = table_view{{int_col, string_col, float_col, string_col, list_int_col}};
-  cudf_io::table_input_metadata output_metadata(output);
+  cudf::io::table_input_metadata output_metadata(output);
   output_metadata.column_metadata[0].set_name("col_other");
   output_metadata.column_metadata[1].set_name("col_string");
   output_metadata.column_metadata[2].set_name("col_float");
@@ -4409,37 +4393,38 @@ TEST_F(ParquetReaderTest, BinaryAsStrings)
   output_metadata.column_metadata[4].set_name("col_binary").set_output_as_binary(true);
 
   auto filepath = temp_env->get_temp_filepath("BinaryReadStrings.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, output)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, output)
       .metadata(&output_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
   auto expected_string = table_view{{int_col, string_col, float_col, string_col, string_col}};
   auto expected_mixed  = table_view{{int_col, string_col, float_col, list_int_col, list_int_col}};
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .set_column_schema({{}, {}, {}, {}, {}});
-  auto result = cudf_io::read_parquet(in_opts);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
 
-  cudf_io::parquet_reader_options default_in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath});
-  result = cudf_io::read_parquet(default_in_opts);
+  cudf::io::parquet_reader_options default_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  result = cudf::io::read_parquet(default_in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_string, result.tbl->view());
 
-  std::vector<cudf_io::reader_column_schema> md{
+  std::vector<cudf::io::reader_column_schema> md{
     {},
     {},
     {},
-    cudf_io::reader_column_schema().set_convert_binary_to_strings(false),
-    cudf_io::reader_column_schema().set_convert_binary_to_strings(false)};
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false),
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false)};
 
-  cudf_io::parquet_reader_options mixed_in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md);
-  result = cudf_io::read_parquet(mixed_in_opts);
+  cudf::io::parquet_reader_options mixed_in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  result = cudf::io::read_parquet(mixed_in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_mixed, result.tbl->view());
 }
@@ -4478,32 +4463,33 @@ TEST_F(ParquetReaderTest, NestedByteArray)
     {{'M', 'o', 'n', 'd', 'a', 'y'}, {'F', 'r', 'i', 'd', 'a', 'y'}}};
 
   auto const expected = table_view{{int_col, float_col, list_list_int_col}};
-  cudf_io::table_input_metadata output_metadata(expected);
+  cudf::io::table_input_metadata output_metadata(expected);
   output_metadata.column_metadata[0].set_name("col_other");
   output_metadata.column_metadata[1].set_name("col_float");
   output_metadata.column_metadata[2].set_name("col_binary").child(1).set_output_as_binary(true);
 
   auto filepath = temp_env->get_temp_filepath("NestedByteArray.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&output_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  auto source = cudf_io::datasource::create(filepath);
-  cudf_io::parquet::FileMetaData fmd;
+  auto source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
 
   read_footer(source, &fmd);
   EXPECT_EQ(fmd.schema[5].type, cudf::io::parquet::Type::BYTE_ARRAY);
 
-  std::vector<cudf_io::reader_column_schema> md{
+  std::vector<cudf::io::reader_column_schema> md{
     {},
     {},
-    cudf_io::reader_column_schema().add_child(
-      cudf_io::reader_column_schema().set_convert_binary_to_strings(false))};
+    cudf::io::reader_column_schema().add_child(
+      cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md);
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
@@ -4524,23 +4510,23 @@ TEST_F(ParquetWriterTest, ByteArrayStats)
     {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}, {0xfe, 0xfe, 0xfe}};
 
   auto expected = table_view{{list_int_col0, list_int_col1}};
-  cudf_io::table_input_metadata output_metadata(expected);
+  cudf::io::table_input_metadata output_metadata(expected);
   output_metadata.column_metadata[0].set_name("col_binary0").set_output_as_binary(true);
   output_metadata.column_metadata[1].set_name("col_binary1").set_output_as_binary(true);
 
   auto filepath = temp_env->get_temp_filepath("ByteArrayStats.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&output_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath})
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
       .set_column_schema({{}, {}});
-  auto result = cudf_io::read_parquet(in_opts);
+  auto result = cudf::io::read_parquet(in_opts);
 
-  auto source = cudf_io::datasource::create(filepath);
-  cudf_io::parquet::FileMetaData fmd;
+  auto source = cudf::io::datasource::create(filepath);
+  cudf::io::parquet::FileMetaData fmd;
 
   read_footer(source, &fmd);
 
@@ -4570,7 +4556,7 @@ TEST_F(ParquetReaderTest, StructByteArray)
 
   auto const expected = table_view{{struct_col}};
   EXPECT_EQ(1, expected.num_columns());
-  cudf_io::table_input_metadata output_metadata(expected);
+  cudf::io::table_input_metadata output_metadata(expected);
   output_metadata.column_metadata[0]
     .set_name("struct_binary")
     .child(0)
@@ -4578,17 +4564,18 @@ TEST_F(ParquetReaderTest, StructByteArray)
     .set_output_as_binary(true);
 
   auto filepath = temp_env->get_temp_filepath("StructByteArray.parquet");
-  cudf_io::parquet_writer_options out_opts =
-    cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected)
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
       .metadata(&output_metadata);
-  cudf_io::write_parquet(out_opts);
+  cudf::io::write_parquet(out_opts);
 
-  std::vector<cudf_io::reader_column_schema> md{cudf_io::reader_column_schema().add_child(
-    cudf_io::reader_column_schema().set_convert_binary_to_strings(false))};
+  std::vector<cudf::io::reader_column_schema> md{cudf::io::reader_column_schema().add_child(
+    cudf::io::reader_column_schema().set_convert_binary_to_strings(false))};
 
-  cudf_io::parquet_reader_options in_opts =
-    cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}).set_column_schema(md);
-  auto result = cudf_io::read_parquet(in_opts);
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath})
+      .set_column_schema(md);
+  auto result = cudf::io::read_parquet(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }

From ba0febe308a8c097474b3316387dc8051fa1bc64 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 3 Oct 2022 17:29:42 -0700
Subject: [PATCH 005/202] Test/remove thrust vector usage (#11813)

This PR removes usage of `thrust::device_vector` from almost all of our tests. Since the construction of a device vector is not stream-ordered, we should be using `rmm::device_uvector` instead wherever possible. There is one remaining use of `thrust::device_vector`, but that is in an test explicitly verifying that `device_vector` can convert implicitly to a `device_span` so it's worth keeping that there.

I am working on automated tooling to detect any usage of stream 0 in tests as part of a push to prioritize stream-safety in libcudf, and this PR is a prerequisite to adding such tooling to our CI pipeline since at that point any test using stream 0 would fail. Since there is at least one test where I anticipate stream 0 will always be used (the one described above), I should be able to add specific tests to an allowlist as needed. It's an open question whether the added complexity required by the changes in this PR is a worthwhile tradeoff to be able to programmatically detect stream 0 usage. If reviewers feel that the additional complexity is too high, we can revert some (or all) of these changes and I can just plan for allowing stream 0 usage in all of the necessary tests. This PR demonstrates how we would go about removing it if we choose to do so, though.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Tobias Ribizel (https://github.com/upsj)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11813
---
 cpp/include/cudf_test/tdigest_utilities.cuh |  2 +-
 cpp/tests/join/conditional_join_tests.cu    | 40 ++++++++++++++++-----
 cpp/tests/quantiles/tdigest_utilities.cu    | 33 +++++++++++++----
 cpp/tests/utilities_tests/span_tests.cu     | 12 +++----
 4 files changed, 64 insertions(+), 23 deletions(-)

diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
index 6f206a789fd..1a75016d78c 100644
--- a/cpp/include/cudf_test/tdigest_utilities.cuh
+++ b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -118,7 +118,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
   // verify min/max
   thrust::host_vector<device_span<T const>> h_spans;
   h_spans.push_back({input_values.begin<T>(), static_cast<size_t>(input_values.size())});
-  thrust::device_vector<device_span<T const>> spans(h_spans);
+  auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::default_stream_value);
 
   auto expected_min = cudf::make_fixed_width_column(
     data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED);
diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu
index bc2a96b5adf..f8dfc972191 100644
--- a/cpp/tests/join/conditional_join_tests.cu
+++ b/cpp/tests/join/conditional_join_tests.cu
@@ -26,10 +26,8 @@
 
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/device_vector.h>
 #include <thrust/equal.h>
-#include <thrust/execution_policy.h>
-#include <thrust/pair.h>
+#include <thrust/iterator/counting_iterator.h>
 #include <thrust/sort.h>
 #include <thrust/transform.h>
 
@@ -127,6 +125,30 @@ gen_random_nullable_repeated_columns(unsigned int N = 10000, unsigned int num_re
                    std::pair(std::move(right), std::move(right_nulls)));
 }
 
+// `rmm::device_uvector<T>` requires that T be trivially copyable. `thrust::pair` does
+// not satisfy this requirement because it defines nontrivial copy/move
+// constructors. Therefore, we need a simple, trivially copyable pair-like
+// object. `index_pair` is a minimal implementation suitable for use in the
+// tests in this file.
+struct index_pair {
+  cudf::size_type first{};
+  cudf::size_type second{};
+  __device__ index_pair(){};
+  __device__ index_pair(cudf::size_type const& first, cudf::size_type const& second)
+    : first(first), second(second){};
+};
+
+__device__ inline bool operator<(const index_pair& lhs, const index_pair& rhs)
+{
+  if (lhs.first > rhs.first) return false;
+  return (lhs.first < rhs.first) || (lhs.second < rhs.second);
+}
+
+__device__ inline bool operator==(const index_pair& lhs, const index_pair& rhs)
+{
+  return lhs.first == rhs.first && lhs.second == rhs.second;
+}
+
 }  // namespace
 
 /**
@@ -253,10 +275,10 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
    */
   void _compare_to_hash_join(PairJoinReturn const& result, PairJoinReturn const& reference)
   {
-    thrust::device_vector<thrust::pair<cudf::size_type, cudf::size_type>> result_pairs(
-      result.first->size());
-    thrust::device_vector<thrust::pair<cudf::size_type, cudf::size_type>> reference_pairs(
-      reference.first->size());
+    auto result_pairs =
+      rmm::device_uvector<index_pair>(result.first->size(), cudf::default_stream_value);
+    auto reference_pairs =
+      rmm::device_uvector<index_pair>(reference.first->size(), cudf::default_stream_value);
 
     thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                       result.first->begin(),
@@ -264,7 +286,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
                       result.second->begin(),
                       result_pairs.begin(),
                       [] __device__(cudf::size_type first, cudf::size_type second) {
-                        return thrust::make_pair(first, second);
+                        return index_pair{first, second};
                       });
     thrust::transform(rmm::exec_policy(cudf::default_stream_value),
                       reference.first->begin(),
@@ -272,7 +294,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
                       reference.second->begin(),
                       reference_pairs.begin(),
                       [] __device__(cudf::size_type first, cudf::size_type second) {
-                        return thrust::make_pair(first, second);
+                        return index_pair{first, second};
                       });
 
     thrust::sort(
diff --git a/cpp/tests/quantiles/tdigest_utilities.cu b/cpp/tests/quantiles/tdigest_utilities.cu
index 63ccd85bd6d..3cf2f2eb4ef 100644
--- a/cpp/tests/quantiles/tdigest_utilities.cu
+++ b/cpp/tests/quantiles/tdigest_utilities.cu
@@ -51,13 +51,34 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv,
   auto sampled_result_weight = cudf::make_fixed_width_column(
     data_type{type_id::FLOAT64}, h_expected.size(), mask_state::UNALLOCATED);
 
-  rmm::device_vector<expected_value> expected(h_expected.begin(), h_expected.end());
+  auto h_expected_src    = std::vector<size_type>(h_expected.size());
+  auto h_expected_mean   = std::vector<double>(h_expected.size());
+  auto h_expected_weight = std::vector<double>(h_expected.size());
+
+  {
+    auto iter = thrust::make_counting_iterator(0);
+    std::for_each_n(iter, h_expected.size(), [&](size_type const index) {
+      h_expected_src[index]    = thrust::get<0>(h_expected[index]);
+      h_expected_mean[index]   = thrust::get<1>(h_expected[index]);
+      h_expected_weight[index] = thrust::get<2>(h_expected[index]);
+    });
+  }
+
+  auto d_expected_src =
+    cudf::detail::make_device_uvector_async(h_expected_src, cudf::default_stream_value);
+  auto d_expected_mean =
+    cudf::detail::make_device_uvector_async(h_expected_mean, cudf::default_stream_value);
+  auto d_expected_weight =
+    cudf::detail::make_device_uvector_async(h_expected_weight, cudf::default_stream_value);
+
   auto iter = thrust::make_counting_iterator(0);
   thrust::for_each(
     rmm::exec_policy(cudf::default_stream_value),
     iter,
-    iter + expected.size(),
-    [expected            = expected.data().get(),
+    iter + h_expected.size(),
+    [expected_src_in     = d_expected_src.data(),
+     expected_mean_in    = d_expected_mean.data(),
+     expected_weight_in  = d_expected_weight.data(),
      expected_mean       = expected_mean->mutable_view().begin<double>(),
      expected_weight     = expected_weight->mutable_view().begin<double>(),
      result_mean         = result_mean.begin<double>(),
@@ -65,9 +86,9 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv,
      sampled_result_mean = sampled_result_mean->mutable_view().begin<double>(),
      sampled_result_weight =
        sampled_result_weight->mutable_view().begin<double>()] __device__(size_type index) {
-      expected_mean[index]         = thrust::get<1>(expected[index]);
-      expected_weight[index]       = thrust::get<2>(expected[index]);
-      auto const src_index         = thrust::get<0>(expected[index]);
+      expected_mean[index]         = expected_mean_in[index];
+      expected_weight[index]       = expected_weight_in[index];
+      auto const src_index         = expected_src_in[index];
       sampled_result_mean[index]   = result_mean[src_index];
       sampled_result_weight[index] = result_weight[src_index];
     });
diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu
index fc4104c765b..cccef4b6284 100644
--- a/cpp/tests/utilities_tests/span_tests.cu
+++ b/cpp/tests/utilities_tests/span_tests.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/span.hpp>
 #include <io/utilities/hostdevice_vector.hpp>
@@ -234,17 +235,14 @@ __global__ void simple_device_kernel(device_span<bool> result) { result[0] = tru
 
 TEST(SpanTest, CanUseDeviceSpan)
 {
-  rmm::device_vector<bool> d_message = std::vector<bool>({false});
+  auto d_message =
+    cudf::detail::make_zeroed_device_uvector_async<bool>(1, cudf::default_stream_value);
 
-  auto d_span = device_span<bool>(d_message.data().get(), d_message.size());
+  auto d_span = device_span<bool>(d_message.data(), d_message.size());
 
   simple_device_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(d_span);
 
-  cudaDeviceSynchronize();
-
-  thrust::host_vector<bool> h_message = d_message;
-
-  ASSERT_TRUE(h_message[0]);
+  ASSERT_TRUE(d_message.element(0, cudf::default_stream_value));
 }
 
 class MdSpanTest : public cudf::test::BaseFixture {

From 5e42c2d80ce7850e4f03b7f703b206a12927d797 Mon Sep 17 00:00:00 2001
From: jakirkham <jakirkham@gmail.com>
Date: Tue, 4 Oct 2022 06:06:16 -0700
Subject: [PATCH 006/202] Use conda-forge's `pyorc` (#11855)

This PR switches the `pyorc` install from a `pip` wheel to a `conda` package.

xref: https://github.com/rapidsai/cudf/pull/7085#discussion_r553446553

Authors:
  - https://github.com/jakirkham

Approvers:
  - Jordan Jacobelli (https://github.com/Ethyling)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11855
---
 conda/environments/cudf_dev_cuda11.5.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index d161804ce7e..d7a762e00f9 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -70,6 +70,7 @@ dependencies:
   - cachetools
   - transformers<=4.10.3
   - pydata-sphinx-theme
+  - pyorc
   - librdkafka=1.7.0
   - python-confluent-kafka=1.7.0
   - moto>=3.1.6
@@ -79,9 +80,6 @@ dependencies:
   - s3fs>=2022.3.0
   - werkzeug<2.2.0 # Temporary transient dependency pinning to avoid URL-LIB3 + moto timeouts
   - pytorch<1.12.0
-  - pip:
-      - git+https://github.com/python-streamz/streamz.git@master
-      - pyorc
   - cubinlinker  # [linux64]
   - gcc_linux-64=9.* # [linux64]
   - sysroot_linux-64==2.17 # [linux64]
@@ -90,3 +88,5 @@ dependencies:
   # - gcc_linux-aarch64=9.* # [aarch64]
   # - sysroot_linux-aarch64==2.17 # [aarch64]
   # - nvcc_linux-aarch64=11.5 # [aarch64]
+  - pip:
+      - git+https://github.com/python-streamz/streamz.git@master

From 7d173c9d144a64c5e1a0467d2a5eb4181854f25e Mon Sep 17 00:00:00 2001
From: Peixin <pxli@nyu.edu>
Date: Tue, 4 Oct 2022 21:24:15 +0800
Subject: [PATCH 007/202] Update cudf JNI version to 22.12.0-SNAPSHOT (#11764)

Update JNI version to 22.12.0-SNAPSHOT

Authors:
  - Peixin (https://github.com/pxLi)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/11764
---
 java/ci/README.md | 4 ++--
 java/pom.xml      | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/java/ci/README.md b/java/ci/README.md
index d74c7b41157..538e18c37c5 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -34,7 +34,7 @@ nvidia-docker run -it cudf-build:11.5.0-devel-centos7 bash
 You can download the cuDF repo in the docker container or you can mount it into the container.
 Here I choose to download again in the container.
 ```bash
-git clone --recursive https://github.com/rapidsai/cudf.git -b branch-22.10
+git clone --recursive https://github.com/rapidsai/cudf.git -b branch-22.12
 ```
 
 ### Build cuDF jar with devtoolset
@@ -47,5 +47,5 @@ scl enable devtoolset-9 "java/ci/build-in-docker.sh"
 
 ### The output
 
-You can find the cuDF jar in java/target/ like cudf-22.10.0-SNAPSHOT-cuda11.jar.
+You can find the cuDF jar in java/target/ like cudf-22.12.0-SNAPSHOT-cuda11.jar.
 
diff --git a/java/pom.xml b/java/pom.xml
index f2bb3def459..10d5c7ec360 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -21,7 +21,7 @@
 
     <groupId>ai.rapids</groupId>
     <artifactId>cudf</artifactId>
-    <version>22.10.0-SNAPSHOT</version>
+    <version>22.12.0-SNAPSHOT</version>
 
     <name>cudfjni</name>
     <description>

From 0fb4d7621de51e58fb63c30c73d35cd01a116ef4 Mon Sep 17 00:00:00 2001
From: Gregory Kimball <gregory.kimball@sunpowercorp.com>
Date: Tue, 4 Oct 2022 10:02:36 -0700
Subject: [PATCH 008/202] Remove unused includes for table/row_operators
 (#11857)

After reviewing usages of the "legacy" row operators, several of the includes are no longer needed.

Authors:
  - Gregory Kimball (https://github.com/GregoryKimball)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11857
---
 cpp/src/groupby/sort/group_single_pass_reduction_util.cuh | 1 -
 cpp/src/partitioning/round_robin.cu                       | 1 -
 cpp/src/search/search_ordered.cu                          | 1 -
 3 files changed, 3 deletions(-)

diff --git a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
index 93d5e6c032c..58ee06fcfef 100644
--- a/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
+++ b/cpp/src/groupby/sort/group_single_pass_reduction_util.cuh
@@ -25,7 +25,6 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/element_argminmax.cuh>
 #include <cudf/detail/valid_if.cuh>
-#include <cudf/table/row_operators.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
 
diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu
index d455df3e890..85bd31a20ea 100644
--- a/cpp/src/partitioning/round_robin.cu
+++ b/cpp/src/partitioning/round_robin.cu
@@ -20,7 +20,6 @@
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/null_mask.hpp>
-#include <cudf/table/row_operators.cuh>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/types.hpp>
diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu
index 8d3b0f97726..754a17dc6d8 100644
--- a/cpp/src/search/search_ordered.cu
+++ b/cpp/src/search/search_ordered.cu
@@ -19,7 +19,6 @@
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/dictionary/detail/update_keys.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>

From 001aede876f58a2c14b30176dcdd981d1c121769 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Wed, 5 Oct 2022 14:32:14 -0500
Subject: [PATCH 009/202] JNI Avoid NPE for reading host binary data (#11865)

This avoids a potential null pointer exception when trying to read byte data from an empty column

Authors:
  - Robert (Bobby) Evans (https://github.com/revans2)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11865
---
 java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java b/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java
index 8b1a9a63131..95d209c0984 100644
--- a/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java
+++ b/java/src/main/java/ai/rapids/cudf/HostColumnVectorCore.java
@@ -399,7 +399,9 @@ public byte[] getBytesFromList(long rowIndex) {
     int size = end - start;
 
     byte[] result = new byte[size];
-    listData.offHeap.data.getBytes(result, 0, start, size);
+    if (size > 0) {
+      listData.offHeap.data.getBytes(result, 0, start, size);
+    }
     return result;
   }
 

From 6d1854381b895d7dfd11540a763a4068cb556c66 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 5 Oct 2022 15:06:37 -0500
Subject: [PATCH 010/202] Unpin `dask` and `distributed` for development
 (#11859)

This PR relaxes the pinnings of `dask` and `distributed` for `22.12` development.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Joseph (https://github.com/jolorunyomi)
  - https://github.com/jakirkham

URL: https://github.com/rapidsai/cudf/pull/11859
---
 ci/benchmark/build.sh                    | 2 +-
 ci/cpu/build.sh                          | 2 +-
 ci/gpu/build.sh                          | 2 +-
 conda/environments/cudf_dev_cuda11.5.yml | 4 ++--
 conda/recipes/custreamz/meta.yaml        | 4 ++--
 conda/recipes/dask-cudf/meta.yaml        | 8 ++++----
 python/dask_cudf/setup.py                | 4 ++--
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index ffa48797fe3..a8bc33e00bc 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
 export DASK_STABLE_VERSION="2022.9.2"
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index 560de6db187..400a1ba4764 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -28,7 +28,7 @@ export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
 
 # Whether to keep `dask/label/dev` channel in the env. If INSTALL_DASK_MAIN=0,
 # `dask/label/dev` channel is removed.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Switch to project root; also root of repo checkout
 cd "$WORKSPACE"
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 7eebd2bb91d..afcc80a6803 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -32,7 +32,7 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 unset GIT_DESCRIBE_TAG
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=0
+export INSTALL_DASK_MAIN=1
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
 export DASK_STABLE_VERSION="2022.9.2"
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index d7a762e00f9..142d3c7d9cb 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -49,8 +49,8 @@ dependencies:
   - pydocstyle=6.1.1
   - typing_extensions
   - pre-commit
-  - dask==2022.9.2
-  - distributed==2022.9.2
+  - dask>=2022.9.2
+  - distributed>=2022.9.2
   - streamz
   - arrow-cpp=9
   - dlpack>=0.5,<0.6.0a0
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 596e5fde940..989f8855533 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -29,8 +29,8 @@ requirements:
     - python
     - streamz
     - cudf ={{ version }}
-    - dask==2022.9.2
-    - distributed==2022.9.2
+    - dask>=2022.9.2
+    - distributed>=2022.9.2
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka ={{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 2d95151018b..1c718c0e995 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -24,14 +24,14 @@ requirements:
   host:
     - python
     - cudf ={{ version }}
-    - dask==2022.9.2
-    - distributed==2022.9.2
+    - dask>=2022.9.2
+    - distributed>=2022.9.2
     - cudatoolkit ={{ cuda_version }}
   run:
     - python
     - cudf ={{ version }}
-    - dask==2022.9.2
-    - distributed==2022.9.2
+    - dask>=2022.9.2
+    - distributed>=2022.9.2
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:                                   # [linux64]
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 54b8f69c6d9..4fa2af89b9d 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -9,8 +9,8 @@
 
 install_requires = [
     "cudf",
-    "dask==2022.9.2",
-    "distributed==2022.9.2",
+    "dask>=2022.9.2",
+    "distributed>=2022.9.2",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",

From 45254745b4d91588fc6575d649d010110a5e7ad3 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Wed, 5 Oct 2022 16:30:05 -0500
Subject: [PATCH 011/202] Parquet reader: bug fix for a num_rows/skip_rows
 corner case, w/optimization for nested preprocessing (#11752)

Fixes an issue where using user bounds with parquet files containing both nested and non-nested types could result in incorrect row counts for the non-nested columns.  Originally reported by @etseidl

The nature of the fix also implements a longstanding desired optimization:  when running the preprocess step for nested types, ignore pages for non-nested hierarchies.  This can result in significant speedups for files containing only a few nested columns.

<s>The tests added for this PR seem to tease a bug in the parquet writer into happening (https://github.com/rapidsai/cudf/issues/11748) so I will leave this as a draft until that issue is resolved.</s>

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/11752
---
 cpp/src/io/parquet/page_data.cu    | 78 ++++++++++++-------------
 cpp/src/io/parquet/reader_impl.cu  | 59 ++++++++++---------
 cpp/src/io/parquet/reader_impl.hpp |  6 +-
 cpp/tests/io/parquet_test.cpp      | 92 +++++++++++++++++++++++++++++-
 4 files changed, 162 insertions(+), 73 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 531733a7df7..a5f6d737637 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1175,7 +1175,8 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu
                                                              int t)
 {
   // max nesting depth of the column
-  int const max_depth = s->col.max_nesting_depth;
+  int const max_depth       = s->col.max_nesting_depth;
+  bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
   // how many (input) values we've processed in the page so far
   int input_value_count = s->input_value_count;
   // how many rows we've processed in the page so far
@@ -1235,7 +1236,7 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu
       uint32_t const warp_valid_mask =
         // for flat schemas, a simple ballot_sync gives us the correct count and bit positions
         // because every value in the input matches to a value in the output
-        max_depth == 1
+        !has_repetition
           ? ballot(is_valid)
           :
           // for nested schemas, it's more complicated.  This warp will visit 32 incoming values,
@@ -1284,11 +1285,12 @@ static __device__ void gpuUpdateValidityOffsetsAndRowIndices(int32_t target_inpu
       // the correct position to start reading. since we are about to write the validity vector here
       // we need to adjust our computed mask to take into account the write row bounds.
       int const in_write_row_bounds =
-        max_depth == 1
+        !has_repetition
           ? thread_row_index >= s->first_row && thread_row_index < (s->first_row + s->num_rows)
           : in_row_bounds;
       int const first_thread_in_write_range =
-        max_depth == 1 ? __ffs(ballot(in_write_row_bounds)) - 1 : 0;
+        !has_repetition ? __ffs(ballot(in_write_row_bounds)) - 1 : 0;
+
       // # of bits to of the validity mask to write out
       int const warp_valid_mask_bit_count =
         first_thread_in_write_range < 0 ? 0 : warp_value_count - first_thread_in_write_range;
@@ -1384,7 +1386,6 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
 {
   // max nesting depth of the column
   int max_depth = s->col.max_nesting_depth;
-  // bool has_repetition = s->col.max_level[level_type::REPETITION] > 0 ? true : false;
   // how many input level values we've processed in the page so far
   int input_value_count = s->input_value_count;
   // how many leaf values we've processed in the page so far
@@ -1479,6 +1480,11 @@ __global__ void __launch_bounds__(block_size)
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
+  // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
+  // containing lists anywhere within).
+  bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
+  if (!has_repetition) { return; }
+
   if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
     return;
   }
@@ -1504,8 +1510,6 @@ __global__ void __launch_bounds__(block_size)
   }
   __syncthreads();
 
-  bool has_repetition = s->col.max_level[level_type::REPETITION] > 0;
-
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
   // 1 warp.  Currently it only uses 1 warp so that it can overlap work with the value decoding step
   // when in the actual value decoding kernel. However, during this preprocess step we have no such
@@ -1516,16 +1520,13 @@ __global__ void __launch_bounds__(block_size)
     while (!s->error && s->input_value_count < s->num_input_values) {
       // decode repetition and definition levels. these will attempt to decode at
       // least up to the target, but may decode a few more.
-      if (has_repetition) {
-        gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
-      }
+      gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
       gpuDecodeStream(s->def, s, target_input_count, t, level_type::DEFINITION);
       __syncwarp();
 
       // we may have decoded different amounts from each stream, so only process what we've been
-      int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
-                                                    s->lvl_count[level_type::DEFINITION])
-                                              : s->lvl_count[level_type::DEFINITION];
+      int actual_input_count =
+        min(s->lvl_count[level_type::REPETITION], s->lvl_count[level_type::DEFINITION]);
 
       // process what we got back
       gpuUpdatePageSizes(s, actual_input_count, t, trim_pass);
@@ -1573,6 +1574,8 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
       ((s->col.data_type & 7) == BOOLEAN || (s->col.data_type & 7) == BYTE_ARRAY) ? 64 : 32;
   }
 
+  bool const has_repetition = s->col.max_level[level_type::REPETITION] > 0;
+
   // skipped_leaf_values will always be 0 for flat hierarchies.
   uint32_t skipped_leaf_values = s->page.skipped_leaf_values;
   while (!s->error && (s->input_value_count < s->num_input_values || s->src_pos < s->nz_count)) {
@@ -1625,7 +1628,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
       // - so we will end up ignoring the first two input rows, and input rows 2..n will
       //   get written to the output starting at position 0.
       //
-      if (s->col.max_nesting_depth == 1) { dst_pos -= s->first_row; }
+      if (!has_repetition) { dst_pos -= s->first_row; }
 
       // target_pos will always be properly bounded by num_rows, but dst_pos may be negative (values
       // before first_row) in the flat hierarchy case.
@@ -1765,6 +1768,8 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
 
   // computes:
   // PageInfo::chunk_row for all pages
+  // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
+  // been computed during header decoding. the overall amount of work here is very small though.
   auto key_input = thrust::make_transform_iterator(
     pages.device_ptr(), [] __device__(PageInfo const& page) { return page.chunk_idx; });
   auto page_input = thrust::make_transform_iterator(
@@ -1840,26 +1845,14 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
           return page.nesting[l_idx].size;
         });
 
-      // compute column size.
+      // if this buffer is part of a list hierarchy, we need to determine it's
+      // final size and allocate it here.
+      //
       // for struct columns, higher levels of the output columns are shared between input
       // columns. so don't compute any given level more than once.
-      if (out_buf.size == 0) {
+      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
         int size = thrust::reduce(rmm::exec_policy(stream), size_input, size_input + pages.size());
 
-        // Handle a specific corner case.  It is possible to construct a parquet file such that
-        // a column within a row group contains more rows than the row group itself. This may be
-        // invalid, but we have seen instances of this in the wild, including how they were created
-        // using the apache parquet tools.  Normally, the trim pass would handle this case quietly,
-        // but if we are not running the trim pass (which is most of the time) we need to cap the
-        // number of rows we will allocate/read from the file with the amount specified in the
-        // associated row group. This only applies to columns that are not children of lists as
-        // those may have an arbitrary number of rows in them.
-        if (!uses_custom_row_bounds &&
-            !(out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) &&
-            size > static_cast<size_type>(num_rows)) {
-          size = static_cast<size_type>(num_rows);
-        }
-
         // if this is a list column add 1 for non-leaf levels for the terminating offset
         if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
 
@@ -1867,16 +1860,21 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
         out_buf.create(size, stream, mr);
       }
 
-      // compute per-page start offset
-      thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
-                                    page_keys.begin(),
-                                    page_keys.end(),
-                                    size_input,
-                                    start_offset_output_iterator{pages.device_ptr(),
-                                                                 page_index.begin(),
-                                                                 0,
-                                                                 static_cast<int>(src_col_schema),
-                                                                 static_cast<int>(l_idx)});
+      // for nested hierarchies, compute per-page start offset.
+      // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but there's
+      // no easy way to get at that info here. we'd have to move this function into reader_impl.cu
+      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) ||
+          out_buf.type.id() == type_id::LIST) {
+        thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
+                                      page_keys.begin(),
+                                      page_keys.end(),
+                                      size_input,
+                                      start_offset_output_iterator{pages.device_ptr(),
+                                                                   page_index.begin(),
+                                                                   0,
+                                                                   static_cast<int>(src_col_schema),
+                                                                   static_cast<int>(l_idx)});
+      }
     }
   }
 
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 59bef6f5600..07869189089 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1353,26 +1353,39 @@ void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& c
                                       hostdevice_vector<gpu::PageInfo>& pages,
                                       size_t min_row,
                                       size_t total_rows,
-                                      bool uses_custom_row_bounds,
-                                      bool has_lists)
+                                      bool uses_custom_row_bounds)
 {
-  // TODO : we should be selectively preprocessing only columns that have
-  // lists in them instead of doing them all if even one contains lists.
-
-  // if there are no lists, simply allocate every allocate every output
-  // column to be of size num_rows
-  if (!has_lists) {
-    std::function<void(std::vector<column_buffer>&)> create_columns =
-      [&](std::vector<column_buffer>& cols) {
-        for (size_t idx = 0; idx < cols.size(); idx++) {
-          auto& col = cols[idx];
-          col.create(total_rows, _stream, _mr);
-          create_columns(col.children);
-        }
-      };
-    create_columns(_output_columns);
-  } else {
-    // preprocess per-nesting level sizes by page
+  // iterate over all input columns and allocate any associated output
+  // buffers if they are not part of a list hierarchy. mark down
+  // if we have any list columns that need further processing.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
+
+    auto* cols = &_output_columns;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+      }
+      // if we haven't already processed this column because it is part of a struct hierarchy
+      else if (out_buf.size == 0) {
+        // add 1 for the offset if this is a list column
+        out_buf.create(
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows,
+          _stream,
+          _mr);
+      }
+    }
+  }
+
+  // if we have columns containing lists, further preprocessing is necessary.
+  if (has_lists) {
     gpu::PreprocessColumnData(pages,
                               chunks,
                               _input_columns,
@@ -1636,9 +1649,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     // Keep track of column chunk file offsets
     std::vector<size_t> column_chunk_offsets(num_chunks);
 
-    // if there are lists present, we need to preprocess
-    bool has_lists = false;
-
     // Initialize column chunk information
     size_t total_decompressed_size = 0;
     auto remaining_rows            = num_rows;
@@ -1657,9 +1667,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
         auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
         auto& schema   = _metadata->get_schema(col.schema_idx);
 
-        // this column contains repetition levels and will require a preprocess
-        if (schema.max_repetition_level > 0) { has_lists = true; }
-
         auto [type_width, clock_rate, converted_type] =
           conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
                           _timestamp_type.id(),
@@ -1755,7 +1762,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       //
       // - for nested schemas, output buffer offset values per-page, per nesting-level for the
       // purposes of decoding.
-      preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds, has_lists);
+      preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds);
 
       // decoding of column data itself
       decode_page_data(chunks, pages, page_nesting_info, skip_rows, num_rows);
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index e1f275bb8e8..6c3e05b4264 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -148,7 +148,7 @@ class reader::impl {
                              hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info);
 
   /**
-   * @brief Preprocess column information for nested schemas.
+   * @brief Preprocess column information and allocate output buffers.
    *
    * There are several pieces of information we can't compute directly from row counts in
    * the parquet headers when dealing with nested schemas.
@@ -163,15 +163,13 @@ class reader::impl {
    * @param total_rows Maximum number of rows to read
    * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
    * bounds
-   * @param has_lists Whether or not this data contains lists and requires
    * a preprocess.
    */
   void preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
                           hostdevice_vector<gpu::PageInfo>& pages,
                           size_t min_row,
                           size_t total_rows,
-                          bool uses_custom_row_bounds,
-                          bool has_lists);
+                          bool uses_custom_row_bounds);
 
   /**
    * @brief Converts the page data and outputs to columns.
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 8a98efabcb5..134eff54144 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -112,7 +112,7 @@ std::unique_ptr<cudf::table> create_compressible_fixed_table(cudf::size_type num
 // this function replicates the "list_gen" function in
 // python/cudf/cudf/tests/test_parquet.py
 template <typename T>
-std::unique_ptr<cudf::column> make_parquet_list_col(
+std::unique_ptr<cudf::column> make_parquet_list_list_col(
   int skip_rows, int num_rows, int lists_per_row, int list_size, bool include_validity)
 {
   auto valids =
@@ -2212,8 +2212,8 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
 
   bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
                  true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true};
 
+                 true,  true, true, true, true, true, true, true, true};
   T c1a[num_els];
   std::fill(c1a, c1a + num_els, static_cast<T>(5));
   T c1b[num_els];
@@ -2589,6 +2589,92 @@ TEST_F(ParquetReaderTest, UserBoundsWithNulls)
   }
 }
 
+TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
+{
+  constexpr int num_rows = 32 * 1024;
+
+  std::mt19937 gen(6542);
+  std::bernoulli_distribution bn(0.7f);
+  auto valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return bn(gen); });
+  auto values = thrust::make_counting_iterator(0);
+
+  // int64
+  cudf::test::fixed_width_column_wrapper<int64_t> c0(values, values + num_rows, valids);
+
+  // list<float>
+  constexpr int floats_per_row = 4;
+  auto c1_offset_iter          = cudf::detail::make_counting_transform_iterator(
+    0, [floats_per_row](cudf::size_type idx) { return idx * floats_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> c1_offsets(
+    c1_offset_iter, c1_offset_iter + num_rows + 1);
+  cudf::test::fixed_width_column_wrapper<float> c1_floats(
+    values, values + (num_rows * floats_per_row), valids);
+  auto _c1 = cudf::make_lists_column(num_rows,
+                                     c1_offsets.release(),
+                                     c1_floats.release(),
+                                     cudf::UNKNOWN_NULL_COUNT,
+                                     cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto c1  = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c1));
+
+  // list<list<int>>
+  auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
+
+  // struct<list<string>, int, float>
+  std::vector<std::string> strings{
+    "abc", "x", "bananas", "gpu", "minty", "backspace", "", "cayenne", "turbine", "soft"};
+  std::uniform_int_distribution<int> uni(0, strings.size() - 1);
+  auto string_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](cudf::size_type idx) { return strings[uni(gen)]; });
+  constexpr int string_per_row  = 3;
+  constexpr int num_string_rows = num_rows * string_per_row;
+  cudf::test::strings_column_wrapper string_col{string_iter, string_iter + num_string_rows};
+  auto offset_iter = cudf::detail::make_counting_transform_iterator(
+    0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
+                                                                    offset_iter + num_rows + 1);
+  auto _c3_list =
+    cudf::make_lists_column(num_rows,
+                            offsets.release(),
+                            string_col.release(),
+                            cudf::UNKNOWN_NULL_COUNT,
+                            cudf::test::detail::make_null_mask(valids, valids + num_rows));
+  auto c3_list = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c3_list));
+  cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
+  cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
+  std::vector<std::unique_ptr<cudf::column>> c3_children;
+  c3_children.push_back(std::move(c3_list));
+  c3_children.push_back(c3_ints.release());
+  c3_children.push_back(c3_floats.release());
+  cudf::test::structs_column_wrapper _c3(std::move(c3_children));
+  auto c3 = cudf::purge_nonempty_nulls(static_cast<cudf::structs_column_view>(_c3));
+
+  // write it out
+  cudf::table_view tbl({c0, *c1, *c2, *c3});
+  auto filepath = temp_env->get_temp_filepath("UserBoundsWithNullsMixedTypes.parquet");
+  cudf::io::parquet_writer_options out_args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, tbl);
+  cudf::io::write_parquet(out_args);
+
+  // read it back
+  std::vector<std::pair<int, int>> params{
+    {-1, -1}, {0, num_rows}, {1, num_rows - 1}, {num_rows - 1, 1}, {517, 22000}};
+  for (auto p : params) {
+    cudf::io::parquet_reader_options read_args =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+    if (p.first >= 0) { read_args.set_skip_rows(p.first); }
+    if (p.second >= 0) { read_args.set_num_rows(p.second); }
+    auto result = cudf::io::read_parquet(read_args);
+
+    p.first  = p.first < 0 ? 0 : p.first;
+    p.second = p.second < 0 ? num_rows - p.first : p.second;
+    std::vector<cudf::size_type> slice_indices{p.first, p.first + p.second};
+    auto expected = cudf::slice(tbl, slice_indices);
+
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, expected[0]);
+  }
+}
+
 TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
 {
   constexpr int num_rows = 30 * 1000000;
@@ -2636,7 +2722,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsLarge)
 TEST_F(ParquetReaderTest, ListUserBoundsWithNullsLarge)
 {
   constexpr int num_rows = 5 * 1000000;
-  auto colp              = make_parquet_list_col<int>(0, num_rows, 5, 8, true);
+  auto colp              = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
   cudf::column_view col  = *colp;
 
   // this file will have row groups of 1,000,000 each

From 029b1dbbaf4aa7f2eb19f29a68589e574c0c7230 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 6 Oct 2022 10:41:33 -0700
Subject: [PATCH 012/202] Fix RangeIndex unary operators. (#11868)

These operators rely on a method that was renamed in #11272 and are also out of sync with the rest of the `RangeIndex` design now that the `__getattr__` overload has been removed (#10538).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11868
---
 python/cudf/cudf/core/index.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index b6ae7beebc5..3734893627f 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -867,15 +867,14 @@ def min(self):
     def max(self):
         return self._minmax("max")
 
+    def __neg__(self):
+        return -self._as_int_index()
 
-# Patch in all binops and unary ops, which bypass __getattr__ on the instance
-# and prevent the above overload from working.
-for unaop in ("__neg__", "__pos__", "__abs__"):
-    setattr(
-        RangeIndex,
-        unaop,
-        lambda self, op=unaop: getattr(self._as_int64(), op)(),
-    )
+    def __pos__(self):
+        return +self._as_int_index()
+
+    def __abs__(self):
+        return abs(self._as_int_index())
 
 
 class GenericIndex(SingleColumnFrame, BaseIndex):

From e323f0a75757374bce17d4c4832f422b4e8c19a3 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 6 Oct 2022 15:55:37 -0400
Subject: [PATCH 013/202] Fix make_column_from_scalar for all-null strings
 column (#11807)

Fixes the `cudf::make_column_from_scalar` for an invalid `cudf::string_scalar` to return a column with children. Some libcudf APIs will not work with a strings column with no children. This condition would be rare enough that additional logic for checking no children in these places would be a performance and maintenance issue.
This also greatly simplifies the `make_column_from_scalar` specialization logic for strings.

Closes #11756

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11807
---
 cpp/src/column/column_factories.cu  | 12 +++---------
 cpp/src/strings/filling/fill.cu     | 20 ++++++++++++--------
 cpp/tests/column/factories_test.cpp |  2 ++
 cpp/tests/filling/fill_tests.cpp    |  4 ++--
 cpp/tests/strings/fill_tests.cpp    | 10 +++++-----
 5 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/cpp/src/column/column_factories.cu b/cpp/src/column/column_factories.cu
index 90252fd6cf1..c401b765f0b 100644
--- a/cpp/src/column/column_factories.cu
+++ b/cpp/src/column/column_factories.cu
@@ -54,21 +54,15 @@ std::unique_ptr<cudf::column> column_from_scalar_dispatch::operator()<cudf::stri
   rmm::mr::device_memory_resource* mr) const
 {
   if (size == 0) return make_empty_column(value.type());
-  auto null_mask = detail::create_null_mask(size, mask_state::ALL_NULL, stream, mr);
 
-  if (!value.is_valid(stream))
-    return std::make_unique<column>(
-      value.type(), size, rmm::device_buffer{}, std::move(null_mask), size);
-
-  // Create a strings column_view with all nulls and no children.
   // Since we are setting every row to the scalar, the fill() never needs to access
   // any of the children in the strings column which would otherwise cause an exception.
-  column_view sc{
-    data_type{type_id::STRING}, size, nullptr, static_cast<bitmask_type*>(null_mask.data()), size};
+  column_view sc{value.type(), size, nullptr};
   auto& sv = static_cast<scalar_type_t<cudf::string_view> const&>(value);
+
   // fill the column with the scalar
   auto output = strings::detail::fill(strings_column_view(sc), 0, size, sv, stream, mr);
-  output->set_null_mask(rmm::device_buffer{}, 0);  // should be no nulls
+
   return output;
 }
 
diff --git a/cpp/src/strings/filling/fill.cu b/cpp/src/strings/filling/fill.cu
index a858a3d6238..f813ec24ee9 100644
--- a/cpp/src/strings/filling/fill.cu
+++ b/cpp/src/strings/filling/fill.cu
@@ -58,14 +58,18 @@ std::unique_ptr<column> fill(
   auto d_strings      = *strings_column;
 
   // create resulting null mask
-  auto valid_mask = cudf::detail::valid_if(
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(strings_count),
-    [d_strings, begin, end, d_value] __device__(size_type idx) {
-      return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
-    },
-    stream,
-    mr);
+  auto valid_mask = [begin, end, d_value, value, d_strings, stream, mr] {
+    if (begin == 0 and end == d_strings.size() and value.is_valid(stream))
+      return std::pair(rmm::device_buffer{}, 0);
+    return cudf::detail::valid_if(
+      thrust::make_counting_iterator<size_type>(0),
+      thrust::make_counting_iterator<size_type>(d_strings.size()),
+      [d_strings, begin, end, d_value] __device__(size_type idx) {
+        return ((begin <= idx) && (idx < end)) ? d_value.is_valid() : !d_strings.is_null(idx);
+      },
+      stream,
+      mr);
+  }();
   auto null_count               = valid_mask.second;
   rmm::device_buffer& null_mask = valid_mask.first;
 
diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp
index e8098202fc3..bd37da91f69 100644
--- a/cpp/tests/column/factories_test.cpp
+++ b/cpp/tests/column/factories_test.cpp
@@ -423,6 +423,7 @@ TEST_F(ColumnFactoryTest, FromStringScalar)
   EXPECT_EQ(0, column->null_count());
   EXPECT_FALSE(column->nullable());
   EXPECT_FALSE(column->has_nulls());
+  EXPECT_TRUE(column->num_children() > 0);
 }
 
 TEST_F(ColumnFactoryTest, FromNullStringScalar)
@@ -434,6 +435,7 @@ TEST_F(ColumnFactoryTest, FromNullStringScalar)
   EXPECT_EQ(2, column->null_count());
   EXPECT_TRUE(column->nullable());
   EXPECT_TRUE(column->has_nulls());
+  EXPECT_TRUE(column->num_children() > 0);
 }
 
 TEST_F(ColumnFactoryTest, FromStringScalarWithZeroSize)
diff --git a/cpp/tests/filling/fill_tests.cpp b/cpp/tests/filling/fill_tests.cpp
index f305d4a06c7..ec400fa61c8 100644
--- a/cpp/tests/filling/fill_tests.cpp
+++ b/cpp/tests/filling/fill_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -206,7 +206,7 @@ class FillStringTestFixture : public cudf::test::BaseFixture {
         }));
 
     auto p_ret = cudf::fill(destination, begin, end, *p_val);
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*p_ret, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*p_ret, expected);
   }
 };
 
diff --git a/cpp/tests/strings/fill_tests.cpp b/cpp/tests/strings/fill_tests.cpp
index 721fb6d8d33..44bbb3c9c29 100644
--- a/cpp/tests/strings/fill_tests.cpp
+++ b/cpp/tests/strings/fill_tests.cpp
@@ -47,7 +47,7 @@ TEST_F(StringsFillTest, Fill)
       h_expected.begin(),
       h_expected.end(),
       thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
     auto results = cudf::strings::detail::fill(view, 2, 4, cudf::string_scalar("", false));
@@ -57,23 +57,23 @@ TEST_F(StringsFillTest, Fill)
       h_expected.begin(),
       h_expected.end(),
       thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
     auto results = cudf::strings::detail::fill(view, 5, 5, cudf::string_scalar("zz"));
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, view.parent());
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, view.parent());
   }
   {
     auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar(""));
     cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
                                                 {1, 1, 1, 1, 1, 1, 1});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
     auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar("", false));
     cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
                                                 {0, 0, 0, 0, 0, 0, 0});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
 }
 

From 1ef722d690bddfc1df48577dada44afe5f5d5aa0 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Fri, 7 Oct 2022 02:23:36 +0530
Subject: [PATCH 014/202] Fix decimal benchmark input data generation (#11863)

closes https://github.com/rapidsai/cudf/issues/11850
Fixes decimal benchmark input data generation.
Generated data alternated between two values because `device_uvector<T>` has both value and scale. scale is fixed for a column and hence when this data is copied to `cudf::column`, this column values alternated between values and scale.
Fix is to use `device_storage_type_t<T>` instead of `T`.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/11863
---
 cpp/benchmarks/common/generate_input.cu | 52 +++++++++++++------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 890a78bb9bf..2bcdaa6760c 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -247,40 +247,33 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
  */
 template <typename T>
 struct random_value_fn<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
-  using rep = typename T::rep;
-  rep const lower_bound;
-  rep const upper_bound;
-  distribution_fn<rep> dist;
+  using DeviceType = cudf::device_storage_type_t<T>;
+  DeviceType const lower_bound;
+  DeviceType const upper_bound;
+  distribution_fn<DeviceType> dist;
   std::optional<numeric::scale_type> scale;
 
-  random_value_fn(distribution_params<rep> const& desc)
+  random_value_fn(distribution_params<DeviceType> const& desc)
     : lower_bound{desc.lower_bound},
       upper_bound{desc.upper_bound},
-      dist{make_distribution<rep>(desc.id, desc.lower_bound, desc.upper_bound)}
+      dist{make_distribution<DeviceType>(desc.id, desc.lower_bound, desc.upper_bound)}
   {
   }
 
-  rmm::device_uvector<T> operator()(thrust::minstd_rand& engine, unsigned size)
+  [[nodiscard]] numeric::scale_type get_scale(thrust::minstd_rand& engine)
   {
     if (not scale.has_value()) {
-      int const max_scale = std::numeric_limits<rep>::digits10;
+      constexpr int max_scale = std::numeric_limits<DeviceType>::digits10;
       std::uniform_int_distribution<int> scale_dist{-max_scale, max_scale};
       std::mt19937 engine_scale(engine());
       scale = numeric::scale_type{scale_dist(engine_scale)};
     }
-    auto const ints = dist(engine, size);
-    rmm::device_uvector<T> result(size, cudf::default_stream_value);
-    // Clamp the generated random value to the specified range
-    thrust::transform(thrust::device,
-                      ints.begin(),
-                      ints.end(),
-                      result.begin(),
-                      [scale       = *(this->scale),
-                       upper_bound = this->upper_bound,
-                       lower_bound = this->lower_bound] __device__(auto int_value) {
-                        return T{std::clamp(int_value, lower_bound, upper_bound), scale};
-                      });
-    return result;
+    return scale.value_or(numeric::scale_type{0});
+  }
+
+  rmm::device_uvector<DeviceType> operator()(thrust::minstd_rand& engine, unsigned size)
+  {
+    return dist(engine, size);
   }
 };
 
@@ -398,9 +391,17 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
     distribution_params<bool>{1. - profile.get_null_probability().value_or(0)});
   auto value_dist = random_value_fn<T>{profile.get_distribution_params<T>()};
 
+  using DeviceType            = cudf::device_storage_type_t<T>;
+  cudf::data_type const dtype = [&]() {
+    if constexpr (cudf::is_fixed_point<T>())
+      return cudf::data_type{cudf::type_to_id<T>(), value_dist.get_scale(engine)};
+    else
+      return cudf::data_type{cudf::type_to_id<T>()};
+  }();
+
   // Distribution for picking elements from the array of samples
   auto const avg_run_len = profile.get_avg_run_length();
-  rmm::device_uvector<T> data(0, cudf::default_stream_value);
+  rmm::device_uvector<DeviceType> data(0, cudf::default_stream_value);
   rmm::device_uvector<bool> null_mask(0, cudf::default_stream_value);
 
   if (profile.get_cardinality() == 0 and avg_run_len == 1) {
@@ -412,11 +413,12 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
                                                                           : profile_cardinality;
     }();
     rmm::device_uvector<bool> samples_null_mask = valid_dist(engine, cardinality);
-    rmm::device_uvector<T> samples              = value_dist(engine, cardinality);
+    rmm::device_uvector<DeviceType> samples     = value_dist(engine, cardinality);
+
     // generate n samples and gather.
     auto const sample_indices =
       sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine);
-    data      = rmm::device_uvector<T>(num_rows, cudf::default_stream_value);
+    data      = rmm::device_uvector<DeviceType>(num_rows, cudf::default_stream_value);
     null_mask = rmm::device_uvector<bool>(num_rows, cudf::default_stream_value);
     thrust::gather(
       thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin());
@@ -431,7 +433,7 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
     cudf::detail::valid_if(null_mask.begin(), null_mask.end(), thrust::identity<bool>{});
 
   return std::make_unique<cudf::column>(
-    cudf::data_type{cudf::type_to_id<T>()},
+    dtype,
     num_rows,
     data.release(),
     profile.get_null_probability().has_value() ? std::move(result_bitmask) : rmm::device_buffer{});

From e20eb94aedf5c8cc5c3f5ce3405e0dab1ace6f63 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <71867292+skirui-source@users.noreply.github.com>
Date: Thu, 6 Oct 2022 15:15:17 -0700
Subject: [PATCH 015/202] part1: Simplify BaseIndex to an abstract class
 (#10389)

This PR is in response to @vyasr  comment, as partial fix for PR https://github.com/rapidsai/cudf/issues/9593 :

`BaseIndex `should be reduced as closely as possible to an abstract class. While there are a subset of APIs that truly make sense for all types of index objects, in almost all cases the optimal implementation for `RangeIndex `(and `MultiIndex`, for that matter) is very different from the implementation for `GenericIndex`. In addition, this change reduces cognitive load for developers by simplifying the inheritance hierarchy

Authors:
  - Sheilah Kirui (https://github.com/skirui-source)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/10389
---
 .../cudf/benchmarks/API/bench_rangeindex.py   |   5 +
 python/cudf/cudf/core/_base_index.py          | 240 ++++++++----------
 python/cudf/cudf/core/column/categorical.py   |   2 +-
 python/cudf/cudf/core/index.py                | 110 +++++++-
 python/cudf/cudf/tests/test_index.py          | 136 ++++++++--
 5 files changed, 332 insertions(+), 161 deletions(-)

diff --git a/python/cudf/benchmarks/API/bench_rangeindex.py b/python/cudf/benchmarks/API/bench_rangeindex.py
index 7b2baef9081..42de5a86b65 100644
--- a/python/cudf/benchmarks/API/bench_rangeindex.py
+++ b/python/cudf/benchmarks/API/bench_rangeindex.py
@@ -40,3 +40,8 @@ def bench_min(benchmark, rangeindex):
 def bench_where(benchmark, rangeindex):
     cond = rangeindex % 2 == 0
     benchmark(rangeindex.where, cond, 0)
+
+
+def bench_isin(benchmark, rangeindex):
+    values = [10, 100]
+    benchmark(rangeindex.isin, values)
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 6898ae4941c..b73536558f1 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -27,10 +27,7 @@
 from cudf.core.column import ColumnBase, column
 from cudf.core.column_accessor import ColumnAccessor
 from cudf.utils import ioutils
-from cudf.utils.dtypes import (
-    is_mixed_with_object_dtype,
-    numeric_normalize_types,
-)
+from cudf.utils.dtypes import is_mixed_with_object_dtype
 
 _index_astype_docstring = """\
 Create an Index with values cast to dtypes.
@@ -90,7 +87,7 @@ def size(self):
 
     @property
     def values(self):
-        return self._values.values
+        raise NotImplementedError
 
     def get_loc(self, key, method=None, tolerance=None):
         raise NotImplementedError
@@ -188,12 +185,7 @@ def _clean_nulls_from_index(self):
         methods using this method to replace or handle representation
         of the actual types correctly.
         """
-        if self._values.has_nulls():
-            return cudf.Index(
-                self._values.astype("str").fillna(cudf._NA_REP), name=self.name
-            )
-        else:
-            return self
+        raise NotImplementedError
 
     @property
     def is_monotonic(self):
@@ -549,13 +541,11 @@ def to_frame(self, index=True, name=None):
             Set the index of the returned DataFrame as the original Index
         name : str, default None
             Name to be used for the column
-
         Returns
         -------
         DataFrame
             cudf DataFrame
         """
-
         if name is not None:
             col_name = name
         elif self.name is None:
@@ -570,7 +560,40 @@ def any(self):
         """
         Return whether any elements is True in Index.
         """
-        return self._values.any()
+        raise NotImplementedError
+
+    def isna(self):
+        """
+        Detect missing values.
+
+        Return a boolean same-sized object indicating if the values are NA.
+        NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`cudf.NaN`, get
+        mapped to ``True`` values.
+        Everything else get mapped to ``False`` values.
+
+        Returns
+        -------
+        numpy.ndarray[bool]
+            A boolean array to indicate which entries are NA.
+
+        """
+        raise NotImplementedError
+
+    def notna(self):
+        """
+        Detect existing (non-missing) values.
+
+        Return a boolean same-sized object indicating if the values are not NA.
+        Non-missing values get mapped to ``True``.
+        NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
+        values.
+
+        Returns
+        -------
+        numpy.ndarray[bool]
+            A boolean array to indicate which entries are not NA.
+        """
+        raise NotImplementedError
 
     def to_pandas(self):
         """
@@ -589,7 +612,75 @@ def to_pandas(self):
         >>> type(idx)
         <class 'cudf.core.index.Int64Index'>
         """
-        return pd.Index(self._values.to_pandas(), name=self.name)
+        raise NotImplementedError
+
+    def isin(self, values):
+        """Return a boolean array where the index values are in values.
+
+        Compute boolean array of whether each index value is found in
+        the passed set of values. The length of the returned boolean
+        array matches the length of the index.
+
+        Parameters
+        ----------
+        values : set, list-like, Index
+            Sought values.
+
+        Returns
+        -------
+        is_contained : cupy array
+            CuPy array of boolean values.
+
+        Examples
+        --------
+        >>> idx = cudf.Index([1,2,3])
+        >>> idx
+        Int64Index([1, 2, 3], dtype='int64')
+
+        Check whether each index value in a list of values.
+
+        >>> idx.isin([1, 4])
+        array([ True, False, False])
+        """
+        # To match pandas behavior, even though only list-like objects are
+        # supposed to be passed, only scalars throw errors. Other types (like
+        # dicts) just transparently return False (see the implementation of
+        # ColumnBase.isin).
+        raise NotImplementedError
+
+    def unique(self):
+        """
+        Return unique values in the index.
+
+        Returns
+        -------
+        Index without duplicates
+        """
+        raise NotImplementedError
+
+    def to_series(self, index=None, name=None):
+        """
+        Create a Series with both index and values equal to the index keys.
+        Useful with map for returning an indexer based on an index.
+
+        Parameters
+        ----------
+        index : Index, optional
+            Index of resulting Series. If None, defaults to original index.
+        name : str, optional
+            Name of resulting Series. If None, defaults to name of original
+            index.
+
+        Returns
+        -------
+        Series
+            The dtype will be based on the type of the Index values.
+        """
+        return cudf.Series._from_data(
+            self._data,
+            index=self.copy(deep=False) if index is None else index,
+            name=self.name if name is None else name,
+        )
 
     @ioutils.doc_to_dlpack()
     def to_dlpack(self):
@@ -599,7 +690,7 @@ def to_dlpack(self):
 
     def append(self, other):
         """
-        Append a collection of Index options together.
+        Append a collection of Index objects together.
 
         Parameters
         ----------
@@ -626,45 +717,7 @@ def append(self, other):
         >>> idx.append([other, other])
         Int64Index([1, 2, 10, 100, 200, 400, 50, 200, 400, 50], dtype='int64')
         """
-
-        if is_list_like(other):
-            to_concat = [self]
-            to_concat.extend(other)
-        else:
-            this = self
-            if len(other) == 0:
-                # short-circuit and return a copy
-                to_concat = [self]
-
-            other = cudf.Index(other)
-
-            if len(self) == 0:
-                to_concat = [other]
-
-            if len(self) and len(other):
-                if is_mixed_with_object_dtype(this, other):
-                    got_dtype = (
-                        other.dtype
-                        if this.dtype == cudf.dtype("object")
-                        else this.dtype
-                    )
-                    raise TypeError(
-                        f"cudf does not support appending an Index of "
-                        f"dtype `{cudf.dtype('object')}` with an Index "
-                        f"of dtype `{got_dtype}`, please type-cast "
-                        f"either one of them to same dtypes."
-                    )
-
-                if isinstance(self._values, cudf.core.column.NumericalColumn):
-                    if self.dtype != other.dtype:
-                        this, other = numeric_normalize_types(self, other)
-                to_concat = [this, other]
-
-        for obj in to_concat:
-            if not isinstance(obj, BaseIndex):
-                raise TypeError("all inputs must be Index")
-
-        return self._concat(to_concat)
+        raise NotImplementedError
 
     def difference(self, other, sort=None):
         """
@@ -1119,18 +1172,6 @@ def sort_values(
         else:
             return index_sorted
 
-    def unique(self):
-        """
-        Return unique values in the index.
-
-        Returns
-        -------
-        Index without duplicates
-        """
-        return cudf.core.index._index_from_data(
-            {self.name: self._values.unique()}, name=self.name
-        )
-
     def join(
         self, other, how="left", level=None, return_indexers=False, sort=False
     ):
@@ -1263,30 +1304,6 @@ def rename(self, name, inplace=False):
             out.name = name
             return out
 
-    def to_series(self, index=None, name=None):
-        """
-        Create a Series with both index and values equal to the index keys.
-        Useful with map for returning an indexer based on an index.
-
-        Parameters
-        ----------
-        index : Index, optional
-            Index of resulting Series. If None, defaults to original index.
-        name : str, optional
-            Dame of resulting Series. If None, defaults to name of original
-            index.
-
-        Returns
-        -------
-        Series
-            The dtype will be based on the type of the Index values.
-        """
-        return cudf.Series(
-            self._values,
-            index=self.copy(deep=False) if index is None else index,
-            name=self.name if name is None else name,
-        )
-
     def get_slice_bound(self, label, side, kind=None):
         """
         Calculate slice bound that corresponds to given label.
@@ -1339,47 +1356,6 @@ def __array_function__(self, func, types, args, kwargs):
         else:
             return NotImplemented
 
-    def isin(self, values):
-        """Return a boolean array where the index values are in values.
-
-        Compute boolean array of whether each index value is found in
-        the passed set of values. The length of the returned boolean
-        array matches the length of the index.
-
-        Parameters
-        ----------
-        values : set, list-like, Index
-            Sought values.
-
-        Returns
-        -------
-        is_contained : cupy array
-            CuPy array of boolean values.
-
-        Examples
-        --------
-        >>> idx = cudf.Index([1,2,3])
-        >>> idx
-        Int64Index([1, 2, 3], dtype='int64')
-
-        Check whether each index value in a list of values.
-
-        >>> idx.isin([1, 4])
-        array([ True, False, False])
-        """
-
-        # To match pandas behavior, even though only list-like objects are
-        # supposed to be passed, only scalars throw errors. Other types (like
-        # dicts) just transparently return False (see the implementation of
-        # ColumnBase.isin).
-        if is_scalar(values):
-            raise TypeError(
-                "only list-like objects are allowed to be passed "
-                f"to isin(), you passed a {type(values).__name__}"
-            )
-
-        return self._values.isin(values).values
-
     @classmethod
     def from_pandas(cls, index, nan_as_null=None):
         """
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 601ad707ba6..af5d140a20a 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -104,7 +104,7 @@ def __init__(self, parent: SeriesOrSingleColumnIndex):
         super().__init__(parent=parent)
 
     @property
-    def categories(self) -> "cudf.core.index.BaseIndex":
+    def categories(self) -> "cudf.core.index.GenericIndex":
         """
         The categories of this categorical.
         """
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 3734893627f..3d77ed15027 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -33,6 +33,8 @@
     is_categorical_dtype,
     is_dtype_equal,
     is_interval_dtype,
+    is_list_like,
+    is_scalar,
     is_string_dtype,
 )
 from cudf.core._base_index import BaseIndex, _index_astype_docstring
@@ -55,7 +57,12 @@
 from cudf.core.mixins import BinaryOperand
 from cudf.core.single_column_frame import SingleColumnFrame
 from cudf.utils.docutils import copy_docstring, doc_apply
-from cudf.utils.dtypes import _maybe_convert_to_default_type, find_common_type
+from cudf.utils.dtypes import (
+    _maybe_convert_to_default_type,
+    find_common_type,
+    is_mixed_with_object_dtype,
+    numeric_normalize_types,
+)
 from cudf.utils.utils import _cudf_nvtx_annotate, search_range
 
 T = TypeVar("T", bound="Frame")
@@ -243,6 +250,9 @@ def _values(self):
         else:
             return column.column_empty(0, masked=False, dtype=self.dtype)
 
+    def _clean_nulls_from_index(self):
+        return self
+
     def is_numeric(self):
         return True
 
@@ -867,6 +877,25 @@ def min(self):
     def max(self):
         return self._minmax("max")
 
+    @property
+    def values(self):
+        return cupy.arange(self.start, self.stop, self.step)
+
+    def any(self):
+        return any(self._range)
+
+    def append(self, other):
+        return self._as_int_index().append(other)
+
+    def isin(self, values):
+        if is_scalar(values):
+            raise TypeError(
+                "only list-like objects are allowed to be passed "
+                f"to isin(), you passed a {type(values).__name__}"
+            )
+
+        return self._values.isin(values).values
+
     def __neg__(self):
         return -self._as_int_index()
 
@@ -1409,6 +1438,81 @@ def where(self, cond, other=None, inplace=False):
             inplace=inplace,
         )
 
+    @property
+    def values(self):
+        return self._column.values
+
+    def __contains__(self, item):
+        return item in self._values
+
+    def _clean_nulls_from_index(self):
+        if self._values.has_nulls():
+            return cudf.Index(
+                self._values.astype("str").fillna(cudf._NA_REP), name=self.name
+            )
+
+        return self
+
+    def any(self):
+        return self._values.any()
+
+    def to_pandas(self):
+        return pd.Index(self._values.to_pandas(), name=self.name)
+
+    def append(self, other):
+        if is_list_like(other):
+            to_concat = [self]
+            to_concat.extend(other)
+        else:
+            this = self
+            if len(other) == 0:
+                # short-circuit and return a copy
+                to_concat = [self]
+
+            other = cudf.Index(other)
+
+            if len(self) == 0:
+                to_concat = [other]
+
+            if len(self) and len(other):
+                if is_mixed_with_object_dtype(this, other):
+                    got_dtype = (
+                        other.dtype
+                        if this.dtype == cudf.dtype("object")
+                        else this.dtype
+                    )
+                    raise TypeError(
+                        f"cudf does not support appending an Index of "
+                        f"dtype `{cudf.dtype('object')}` with an Index "
+                        f"of dtype `{got_dtype}`, please type-cast "
+                        f"either one of them to same dtypes."
+                    )
+
+                if isinstance(self._values, cudf.core.column.NumericalColumn):
+                    if self.dtype != other.dtype:
+                        this, other = numeric_normalize_types(self, other)
+                to_concat = [this, other]
+
+        for obj in to_concat:
+            if not isinstance(obj, BaseIndex):
+                raise TypeError("all inputs must be Index")
+
+        return self._concat(to_concat)
+
+    def unique(self):
+        return cudf.core.index._index_from_data(
+            {self.name: self._values.unique()}, name=self.name
+        )
+
+    def isin(self, values):
+        if is_scalar(values):
+            raise TypeError(
+                "only list-like objects are allowed to be passed "
+                f"to isin(), you passed a {type(values).__name__}"
+            )
+
+        return self._values.isin(values).values
+
 
 class NumericIndex(GenericIndex):
     """Immutable, ordered and sliceable sequence of labels.
@@ -2796,10 +2900,6 @@ def str(self):
         return StringMethods(parent=self)
 
     def _clean_nulls_from_index(self):
-        """
-        Convert all na values(if any) in Index object
-        to `<NA>` as a preprocessing step to `__repr__` methods.
-        """
         if self._values.has_nulls():
             return self.fillna(cudf._NA_REP)
         else:
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index e8c568979a3..358d5e2170e 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2537,32 +2537,20 @@ def rangeindex(request):
     return RangeIndex(request.param)
 
 
-def test_rangeindex_nunique(rangeindex):
-    gidx = rangeindex
-    pidx = gidx.to_pandas()
-
-    actual = gidx.nunique()
-    expected = pidx.nunique()
-
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_min(rangeindex):
-    gidx = rangeindex
-    pidx = gidx.to_pandas()
-
-    actual = gidx.min()
-    expected = pidx.min()
-
-    assert_eq(expected, actual)
-
-
-def test_rangeindex_max(rangeindex):
+@pytest.mark.parametrize(
+    "func",
+    ["nunique", "min", "max", "any", "values"],
+)
+def test_rangeindex_methods(rangeindex, func):
     gidx = rangeindex
     pidx = gidx.to_pandas()
 
-    actual = gidx.max()
-    expected = pidx.max()
+    if func == "values":
+        expected = pidx.values
+        actual = gidx.values
+    else:
+        expected = getattr(pidx, func)()
+        actual = getattr(gidx, func)()
 
     assert_eq(expected, actual)
 
@@ -2693,3 +2681,105 @@ def test_rangeindex_where_user_option(default_integer_bitwidth):
         dtype=f"int{default_integer_bitwidth}",
     )
     assert_eq(expected, actual)
+
+
+index_data = [
+    range(np.random.randint(0, 100)),
+    range(0, 10, -2),
+    range(0, -10, 2),
+    range(0, -10, -2),
+    range(0, 1),
+    [1, 2, 3, 1, None, None],
+    [None, None, 3.2, 1, None, None],
+    [None, "a", "3.2", "z", None, None],
+    pd.Series(["a", "b", None], dtype="category"),
+    np.array([1, 2, 3, None], dtype="datetime64[s]"),
+]
+
+
+@pytest.fixture(params=index_data)
+def index(request):
+    """Create a cudf Index of different dtypes"""
+    return cudf.Index(request.param)
+
+
+@pytest.mark.parametrize(
+    "func",
+    [
+        "to_series",
+        "isna",
+        "notna",
+        "append",
+    ],
+)
+def test_index_methods(index, func):
+    gidx = index
+    pidx = gidx.to_pandas()
+
+    if func == "append":
+        expected = pidx.append(other=pidx)
+        actual = gidx.append(other=gidx)
+    else:
+        expected = getattr(pidx, func)()
+        actual = getattr(gidx, func)()
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "idx, values",
+    [
+        (range(100, 1000, 10), [200, 600, 800]),
+        ([None, "a", "3.2", "z", None, None], ["a", "z"]),
+        (pd.Series(["a", "b", None], dtype="category"), [10, None]),
+    ],
+)
+def test_index_isin_values(idx, values):
+    gidx = cudf.Index(idx)
+    pidx = gidx.to_pandas()
+
+    actual = gidx.isin(values)
+    expected = pidx.isin(values)
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "idx, scalar",
+    [
+        (range(0, -10, -2), -4),
+        ([None, "a", "3.2", "z", None, None], "x"),
+        (pd.Series(["a", "b", None], dtype="category"), 10),
+    ],
+)
+def test_index_isin_scalar_values(idx, scalar):
+    gidx = cudf.Index(idx)
+
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            f"only list-like objects are allowed to be passed "
+            f"to isin(), you passed a {type(scalar).__name__}"
+        ),
+    ):
+        gidx.isin(scalar)
+
+
+def test_index_any():
+    gidx = cudf.Index([1, 2, 3])
+    pidx = gidx.to_pandas()
+
+    assert_eq(pidx.any(), gidx.any())
+
+
+def test_index_values():
+    gidx = cudf.Index([1, 2, 3])
+    pidx = gidx.to_pandas()
+
+    assert_eq(pidx.values, gidx.values)
+
+
+def test_index_null_values():
+    gidx = cudf.Index([1.0, None, 3, 0, None])
+    with pytest.raises(ValueError):
+        gidx.values

From 4c4acd546ccab233bfcf495ab08ad7be8303a30d Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <tribizel@nvidia.com>
Date: Sat, 8 Oct 2022 01:03:03 +0200
Subject: [PATCH 016/202] Add BGZIP reader to python `read_text` (#11802)

Adds the missing integration, plus some tests. I decided to extend the `read_text` interface rather than add a new one. For details on the bgzip format, see #11652

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11802
---
 python/cudf/cudf/_lib/cpp/io/text.pxd         |   7 +++
 python/cudf/cudf/_lib/text.pyx                |  35 ++++++++++--
 python/cudf/cudf/io/text.py                   |  13 ++++-
 python/cudf/cudf/tests/data/text/chess.pgn.gz | Bin 0 -> 881 bytes
 python/cudf/cudf/tests/test_text.py           |  51 ++++++++++++++++++
 python/cudf/cudf/utils/ioutils.py             |  10 ++++
 6 files changed, 110 insertions(+), 6 deletions(-)
 create mode 100644 python/cudf/cudf/tests/data/text/chess.pgn.gz

diff --git a/python/cudf/cudf/_lib/cpp/io/text.pxd b/python/cudf/cudf/_lib/cpp/io/text.pxd
index 5b110d6234c..7bbe870dad3 100644
--- a/python/cudf/cudf/_lib/cpp/io/text.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/text.pxd
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from libc.stdint cimport uint64_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 
@@ -25,6 +26,12 @@ cdef extern from "cudf/io/text/data_chunk_source_factories.hpp" \
     unique_ptr[data_chunk_source] make_source(string data) except +
     unique_ptr[data_chunk_source] \
         make_source_from_file(string filename) except +
+    unique_ptr[data_chunk_source] \
+        make_source_from_bgzip_file(string filename) except +
+    unique_ptr[data_chunk_source] \
+        make_source_from_bgzip_file(string filename,
+                                    uint64_t virtual_begin,
+                                    uint64_t virtual_end) except +
 
 
 cdef extern from "cudf/io/text/multibyte_split.hpp" \
diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index 868574be187..31a5617af58 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -5,6 +5,7 @@ from io import TextIOBase
 import cudf
 
 from cython.operator cimport dereference
+from libc.stdint cimport uint64_t
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -15,6 +16,7 @@ from cudf._lib.cpp.io.text cimport (
     byte_range_info,
     data_chunk_source,
     make_source,
+    make_source_from_bgzip_file,
     make_source_from_file,
     multibyte_split,
 )
@@ -22,7 +24,9 @@ from cudf._lib.cpp.io.text cimport (
 
 def read_text(object filepaths_or_buffers,
               object delimiter=None,
-              object byte_range=None):
+              object byte_range=None,
+              object compression=None,
+              object compression_offsets=None):
     """
     Cython function to call into libcudf API, see `multibyte_split`.
 
@@ -38,11 +42,34 @@ def read_text(object filepaths_or_buffers,
     cdef size_t c_byte_range_offset
     cdef size_t c_byte_range_size
     cdef byte_range_info c_byte_range
+    cdef uint64_t c_compression_begin_offset
+    cdef uint64_t c_compression_end_offset
 
-    if isinstance(filepaths_or_buffers, TextIOBase):
-        datasource = move(make_source(filepaths_or_buffers.read().encode()))
+    if compression is None:
+        if isinstance(filepaths_or_buffers, TextIOBase):
+            datasource = move(make_source(
+                filepaths_or_buffers.read().encode()))
+        else:
+            datasource = move(make_source_from_file(
+                filepaths_or_buffers.encode()))
+    elif compression == "bgzip":
+        if isinstance(filepaths_or_buffers, TextIOBase):
+            raise ValueError("bgzip compression requires a file path")
+        if compression_offsets is not None:
+            if len(compression_offsets) != 2:
+                raise ValueError(
+                    "compression offsets need to consist of two elements")
+            c_compression_begin_offset = compression_offsets[0]
+            c_compression_end_offset = compression_offsets[1]
+            datasource = move(make_source_from_bgzip_file(
+                filepaths_or_buffers.encode(),
+                c_compression_begin_offset,
+                c_compression_end_offset))
+        else:
+            datasource = move(make_source_from_bgzip_file(
+                filepaths_or_buffers.encode()))
     else:
-        datasource = move(make_source_from_file(filepaths_or_buffers.encode()))
+        raise ValueError("Only bgzip compression is supported at the moment")
 
     if (byte_range is None):
         with nogil:
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index 12aa0f6ef8b..23983f01966 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -14,11 +14,16 @@ def read_text(
     filepath_or_buffer,
     delimiter=None,
     byte_range=None,
+    compression=None,
+    compression_offsets=None,
     **kwargs,
 ):
     """{docstring}"""
 
-    filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
+    if delimiter is None:
+        raise ValueError("delimiter needs to be provided")
+
+    filepath_or_buffer, _ = ioutils.get_reader_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
         compression=None,
         iotypes=(BytesIO, StringIO),
@@ -27,6 +32,10 @@ def read_text(
 
     return cudf.Series._from_data(
         libtext.read_text(
-            filepath_or_buffer, delimiter=delimiter, byte_range=byte_range
+            filepath_or_buffer,
+            delimiter=delimiter,
+            byte_range=byte_range,
+            compression=compression,
+            compression_offsets=compression_offsets,
         )
     )
diff --git a/python/cudf/cudf/tests/data/text/chess.pgn.gz b/python/cudf/cudf/tests/data/text/chess.pgn.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f03d0d0f73da338711a703032503539090b4a9fc
GIT binary patch
literal 881
zcmZva%Wl&^6ox0ELddUxM1N&b7D=7X_>x3oWlBLcxWwVIh**s6i5)dcs$41;#DX=i
zz#AaJjwM^z@Br+24HW)~SV1l2XykMGzB%#!`&)a0&)$ujBD&uc1JSrGer?AOF3o(6
z)=77S)T}qlIiBcsUbS91mmj~eM2P)=I-iP$5<j-%(R6Llip;EB>cVs|GRthLar*or
zo5#o6X60a@Tq)fnpYOV#zJF2I41CvY<joJi$LV4-FVIp0+yAoKjX#;L^2#hbNEew|
zu5s$xWHi(HIa;F&y;_}Lbr3C<(-mIqa;aHuX1tN?szd%*?noELhY6ruBohxw9>Q}G
zWdU@EH_s|IFD}e{IxoRvu4gk-U|kucn=3qK>b3jU@!6q$4hRvrI6FK;WO@i)?oEQD
zdUm3RB1GRs?(HfDE-Lb}KID;$g56osbGQ)XK4>9nl-Jh721%)eol(|<w74=r=0g&f
zWD*tYiHj<>vY5Q0Hi47WyqqL6s_8SdL6U1~nhYtcNG4Vxv2PUF3E@f2poov`A<05V
zSvr@LYSV|Z_<GW7BV~lL3igoJMz*1-L6AheOVVU$JPfI;=rOFzz^NFZ=)sl5hWn<4
z=8*~>6?k9X!Um5HsSc170X!=3|B3QGJPO1Ug=i8u9_gv>p=7V!w+`EgxdM;im=?)|
zV+by}>YDrZh>dySm^<~!O$oL2e*Gx@oHzgfq>n}8K>Xa&i?f}CSf5s<kErnAQ!3>R
Z{(ex-*L!<+53c!)?}&yF+buJ@=MSea)6oC`

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index a4edaeff545..7f41d606473 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -845,3 +845,54 @@ def test_read_text_in_memory(datadir):
     actual = cudf.read_text(StringIO("x::y::z"), delimiter="::")
 
     assert_eq(expected, actual)
+
+
+def test_read_text_bgzip(datadir):
+    chess_file_compressed = str(datadir) + "/chess.pgn.gz"
+    chess_file = str(datadir) + "/chess.pgn"
+    delimiter = "1."
+
+    with open(chess_file) as f:
+        content = f.read().split(delimiter)
+
+    # Since Python split removes the delimiter and read_text does
+    # not we need to add it back to the 'content'
+    expected = cudf.Series(
+        [
+            c + delimiter if i < (len(content) - 1) else c
+            for i, c in enumerate(content)
+        ]
+    )
+
+    actual = cudf.read_text(
+        chess_file_compressed, compression="bgzip", delimiter=delimiter
+    )
+
+    assert_eq(expected, actual)
+
+
+def test_read_text_bgzip_offsets(datadir):
+    chess_file_compressed = str(datadir) + "/chess.pgn.gz"
+    chess_file = str(datadir) + "/chess.pgn"
+    delimiter = "1."
+
+    with open(chess_file) as f:
+        content = f.read()[29:695].split(delimiter)
+
+    # Since Python split removes the delimiter and read_text does
+    # not we need to add it back to the 'content'
+    expected = cudf.Series(
+        [
+            c + delimiter if i < (len(content) - 1) else c
+            for i, c in enumerate(content)
+        ]
+    )
+
+    actual = cudf.read_text(
+        chess_file_compressed,
+        compression="bgzip",
+        compression_offsets=[58 * 2**16 + 2, 781 * 2**16 + 7],
+        delimiter=delimiter,
+    )
+
+    assert_eq(expected, actual)
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index fe65b8f22fc..8bb246c9c84 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1172,6 +1172,16 @@
     The output contains all rows that start inside the byte range
     (i.e. at or after the offset, and before the end at `offset + size`),
     which may include rows that continue past the end.
+compression : string, default None
+    Which compression type is the input compressed with.
+    Currently supports only `bgzip`, and requires the path to a file as input.
+compression_offsets: list or tuple, default None
+    The virtual begin and end offset associated with the provided compression.
+    For `bgzip`, they are composed of a local uncompressed offset inside a
+    BGZIP block (lower 16 bits) and the start offset of this BGZIP block in the
+    compressed file (upper 48 bits).
+    The start offset points to the first byte to be read, the end offset points
+    one past the last byte to be read.
 
 Returns
 -------

From 4eb9c6c945674a56eaed740f5411860d4441c9f3 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <tribizel@nvidia.com>
Date: Mon, 10 Oct 2022 09:56:11 +0200
Subject: [PATCH 017/202] Add BGZIP multibyte_split benchmark (#11723)

This refactors #11652 to extract the BGZIP IO and adds another `source_type` to the `multibyte_split` benchmark, creating a compressed file using `zlib`.

A quick benchmark shows performance results around 2.5x slower than reading from a device buffer at around 1:5 compression ratio

### [0] Tesla T4

| source_type | delim_size | delim_percent |    size_approx    | byte_range_percent | Time  | Peak Memory Usage | Encoded file size |
|-------------|------------|---------------|-------------------|--------------------|------------|-------------------|-------------------|
|           bgzip |          1 |             1 | 2^30 = 1073741824 |                100 |  507.479 ms |         4.022 GiB |      1006.638 MiB |
|           file |          1 |             1 | 2^30 = 1073741824 |                100 |  339.860 ms |    3.947 GiB |      1006.638 MiB |
|           device |          1 |             1 | 2^30 = 1073741824 |                100 | 201.556 ms |       3.947 GiB |      1006.638 MiB |

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)
  - Jordan Jacobelli (https://github.com/Ethyling)

URL: https://github.com/rapidsai/cudf/pull/11723
---
 conda/recipes/libcudf/meta.yaml               |   1 +
 cpp/CMakeLists.txt                            |   1 +
 cpp/benchmarks/CMakeLists.txt                 |   3 +-
 cpp/benchmarks/io/text/multibyte_split.cpp    |  70 ++++--
 .../cudf/io/text/detail/bgzip_utils.hpp       | 112 +++++++++
 cpp/src/io/text/bgzip_data_chunk_source.cu    |  72 +-----
 cpp/src/io/text/bgzip_utils.cpp               | 179 ++++++++++++++
 cpp/tests/CMakeLists.txt                      |   1 +
 cpp/tests/io/text/data_chunk_source_test.cpp  | 219 +++++++++---------
 9 files changed, 473 insertions(+), 185 deletions(-)
 create mode 100644 cpp/include/cudf/io/text/detail/bgzip_utils.hpp
 create mode 100644 cpp/src/io/text/bgzip_utils.cpp

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index a417b407044..ccb0d685062 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -152,6 +152,7 @@ outputs:
         - test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp
         - test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp
         - test -f $PREFIX/include/cudf/io/text/data_chunk_source_factories.hpp
+        - test -f $PREFIX/include/cudf/io/text/detail/bgzip_utils.hpp
         - test -f $PREFIX/include/cudf/io/text/detail/multistate.hpp
         - test -f $PREFIX/include/cudf/io/text/detail/tile_state.hpp
         - test -f $PREFIX/include/cudf/io/text/detail/trie.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 60e914f07d3..8bde0bcfb9b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -356,6 +356,7 @@ add_library(
   src/io/text/byte_range_info.cpp
   src/io/text/data_chunk_source_factories.cpp
   src/io/text/bgzip_data_chunk_source.cu
+  src/io/text/bgzip_utils.cpp
   src/io/text/multibyte_split.cu
   src/io/utilities/column_buffer.cpp
   src/io/utilities/config_utils.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index d1ff177a25e..f35d0b0b49e 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -301,7 +301,8 @@ ConfigureNVBench(NESTED_JSON_NVBENCH io/json/nested_json.cpp)
 
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------
-ConfigureNVBench(MULTIBYTE_SPLIT_BENCHMARK io/text/multibyte_split.cpp)
+ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp)
+target_link_libraries(MULTIBYTE_SPLIT_NVBENCH PRIVATE ZLIB::ZLIB)
 
 add_custom_target(
   run_benchmarks
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index 4865d11ae8b..b7e85d8aa7e 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -25,6 +25,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
+#include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/io/text/multibyte_split.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/strings/combine.hpp>
@@ -40,10 +41,25 @@
 #include <cstdio>
 #include <fstream>
 #include <memory>
+#include <random>
 
 temp_directory const temp_dir("cudf_nvbench");
 
-enum class data_chunk_source_type { device, file, host, host_pinned };
+enum class data_chunk_source_type { device, file, host, host_pinned, file_bgzip };
+
+NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
+  data_chunk_source_type,
+  [](auto value) {
+    switch (value) {
+      case data_chunk_source_type::device: return "device";
+      case data_chunk_source_type::file: return "file";
+      case data_chunk_source_type::host: return "host";
+      case data_chunk_source_type::host_pinned: return "host_pinned";
+      case data_chunk_source_type::file_bgzip: return "file_bgzip";
+      default: return "Unknown";
+    }
+  },
+  [](auto) { return std::string{}; })
 
 static cudf::string_scalar create_random_input(int32_t num_chars,
                                                double delim_factor,
@@ -78,14 +94,32 @@ static cudf::string_scalar create_random_input(int32_t num_chars,
   return cudf::string_scalar(std::move(*chars_buffer));
 }
 
-static void bench_multibyte_split(nvbench::state& state)
+static void write_bgzip_file(cudf::host_span<char const> host_data, std::ostream& output_stream)
+{
+  // a bit of variability with a decent amount of padding so we don't overflow 16 bit block sizes
+  std::uniform_int_distribution<std::size_t> chunk_size_dist{64000, 65000};
+  std::default_random_engine rng{};
+  std::size_t pos = 0;
+  while (pos < host_data.size()) {
+    auto const remainder  = host_data.size() - pos;
+    auto const chunk_size = std::min(remainder, chunk_size_dist(rng));
+    cudf::io::text::detail::bgzip::write_compressed_block(output_stream,
+                                                          {host_data.data() + pos, chunk_size});
+    pos += chunk_size;
+  }
+  // empty block denotes EOF
+  cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
+}
+
+template <data_chunk_source_type source_type>
+static void bench_multibyte_split(nvbench::state& state,
+                                  nvbench::type_list<nvbench::enum_type<source_type>>)
 {
   cudf::rmm_pool_raii pool_raii;
 
-  auto const source_type      = static_cast<data_chunk_source_type>(state.get_int64("source_type"));
-  auto const delim_size       = state.get_int64("delim_size");
-  auto const delim_percent    = state.get_int64("delim_percent");
-  auto const file_size_approx = state.get_int64("size_approx");
+  auto const delim_size         = state.get_int64("delim_size");
+  auto const delim_percent      = state.get_int64("delim_percent");
+  auto const file_size_approx   = state.get_int64("size_approx");
   auto const byte_range_percent = state.get_int64("byte_range_percent");
 
   auto const byte_range_factor = static_cast<double>(byte_range_percent) / 100;
@@ -104,7 +138,8 @@ static void bench_multibyte_split(nvbench::state& state)
   auto host_pinned_input =
     thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>>{};
 
-  if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file) {
+  if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file ||
+      source_type == data_chunk_source_type::file_bgzip) {
     host_input = cudf::detail::make_std_vector_sync<char>(
       {device_input.data(), static_cast<std::size_t>(device_input.size())},
       cudf::default_stream_value);
@@ -131,6 +166,14 @@ static void bench_multibyte_split(nvbench::state& state)
         return cudf::io::text::make_source(host_pinned_input);
       case data_chunk_source_type::device:  //
         return cudf::io::text::make_source(device_input);
+      case data_chunk_source_type::file_bgzip: {
+        auto const temp_file_name = random_file_in_dir(temp_dir.path());
+        {
+          std::ofstream output_stream(temp_file_name, std::ofstream::out);
+          write_bgzip_file(host_input, output_stream);
+        }
+        return cudf::io::text::make_source_from_bgzip_file(temp_file_name);
+      }
       default: CUDF_FAIL();
     }
   }();
@@ -152,13 +195,14 @@ static void bench_multibyte_split(nvbench::state& state)
   state.add_buffer_size(range_size, "efs", "Encoded file size");
 }
 
-NVBENCH_BENCH(bench_multibyte_split)
+using source_type_list = nvbench::enum_type_list<data_chunk_source_type::device,
+                                                 data_chunk_source_type::file,
+                                                 data_chunk_source_type::host,
+                                                 data_chunk_source_type::host_pinned,
+                                                 data_chunk_source_type::file_bgzip>;
+
+NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
   .set_name("multibyte_split")
-  .add_int64_axis("source_type",
-                  {static_cast<int>(data_chunk_source_type::device),
-                   static_cast<int>(data_chunk_source_type::file),
-                   static_cast<int>(data_chunk_source_type::host),
-                   static_cast<int>(data_chunk_source_type::host_pinned)})
   .add_int64_axis("delim_size", {1, 4, 7})
   .add_int64_axis("delim_percent", {1, 25})
   .add_int64_power_of_two_axis("size_approx", {15, 30})
diff --git a/cpp/include/cudf/io/text/detail/bgzip_utils.hpp b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp
new file mode 100644
index 00000000000..627df5f358a
--- /dev/null
+++ b/cpp/include/cudf/io/text/detail/bgzip_utils.hpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <zlib.h>
+
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <algorithm>
+#include <array>
+#include <fstream>
+#include <limits>
+
+namespace cudf::io::text::detail::bgzip {
+
+struct header {
+  int block_size;
+  int extra_length;
+  [[nodiscard]] int data_size() const { return block_size - extra_length - 20; }
+};
+
+struct footer {
+  uint32_t crc;
+  uint32_t decompressed_size;
+};
+
+/**
+ * @brief Reads the full BGZIP header from the given input stream. Afterwards, the stream position
+ *        is at the first data byte.
+ *
+ * @param input_stream The input stream
+ * @return The header storing the compressed size and extra subfield length
+ */
+header read_header(std::istream& input_stream);
+
+/**
+ * @brief Reads the full BGZIP footer from the given input stream. Afterwards, the stream position
+ *        is after the last footer byte.
+ *
+ * @param input_stream The input stream
+ * @return The footer storing uncompressed size and CRC32
+ */
+footer read_footer(std::istream& input_stream);
+
+/**
+ * @brief Writes a header for data of the given compressed size to the given stream.
+ *
+ * @param output_stream The output stream
+ * @param compressed_size The size of the compressed data
+ * @param pre_size_subfields Any GZIP extra subfields (need to be valid) to be placed before the
+ *                           BGZIP block size subfield
+ * @param post_size_subfields Any subfields to be placed after the BGZIP block size subfield
+ */
+void write_header(std::ostream& output_stream,
+                  uint16_t compressed_size,
+                  host_span<char const> pre_size_subfields,
+                  host_span<char const> post_size_subfields);
+
+/**
+ * @brief Writes a footer for the given uncompressed data to the given stream.
+ *
+ * @param output_stream The output stream
+ * @param data The data for which uncompressed size and CRC32 will be computed and written
+ */
+void write_footer(std::ostream& output_stream, host_span<char const> data);
+
+/**
+ * @brief Writes the given data to the given stream as an uncompressed deflate block with BZGIP
+ *        header and footer.
+ *
+ * @param output_stream The output stream
+ * @param data The uncompressed data
+ * @param pre_size_subfields Any GZIP extra subfields (need to be valid) to be placed before the
+ *                           BGZIP block size subfield
+ * @param post_size_subfields Any subfields to be placed after the BGZIP block size subfield
+ */
+void write_uncompressed_block(std::ostream& output_stream,
+                              host_span<char const> data,
+                              host_span<char const> pre_size_subfields  = {},
+                              host_span<char const> post_size_subfields = {});
+
+/**
+ * @brief Writes the given data to the given stream as a compressed deflate block with BZGIP
+ *        header and footer.
+ *
+ * @param output_stream The output stream
+ * @param data The uncompressed data
+ * @param pre_size_subfields Any GZIP extra subfields (need to be valid) to be placed before the
+ *                           BGZIP block size subfield
+ * @param post_size_subfields Any subfields to be placed after the BGZIP block size subfield
+ */
+void write_compressed_block(std::ostream& output_stream,
+                            host_span<char const> data,
+                            host_span<char const> pre_size_subfields  = {},
+                            host_span<char const> post_size_subfields = {});
+
+}  // namespace cudf::io::text::detail::bgzip
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 7715c2ca7e1..9c4ff218783 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -20,6 +20,7 @@
 
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
+#include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 
@@ -36,7 +37,6 @@
 #include <limits>
 
 namespace cudf::io::text {
-
 namespace {
 
 /**
@@ -64,68 +64,6 @@ struct bgzip_nvcomp_transform_functor {
 
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
-  template <typename IntType>
-  static IntType read_int(char* data)
-  {
-    IntType result{};
-    // we assume little-endian
-    std::memcpy(&result, &data[0], sizeof(result));
-    return result;
-  }
-
-  struct bgzip_header {
-    int block_size;
-    int extra_length;
-    [[nodiscard]] int data_size() const { return block_size - extra_length - 20; }
-  };
-
-  bgzip_header read_header()
-  {
-    std::array<char, 12> buffer{};
-    _data_stream->read(buffer.data(), sizeof(buffer));
-    std::array<uint8_t, 4> const expected_header{{31, 139, 8, 4}};
-    CUDF_EXPECTS(
-      std::equal(
-        expected_header.begin(), expected_header.end(), reinterpret_cast<uint8_t*>(buffer.data())),
-      "malformed BGZIP header");
-    // we ignore the remaining bytes of the fixed header, since they don't matter to us
-    auto const extra_length = read_int<uint16_t>(&buffer[10]);
-    uint16_t extra_offset{};
-    // read all the extra subfields
-    while (extra_offset < extra_length) {
-      auto const remaining_size = extra_length - extra_offset;
-      CUDF_EXPECTS(remaining_size >= 4, "invalid extra field length");
-      // a subfield consists of 2 identifier bytes and a uint16 length
-      // 66/67 identifies a BGZIP block size field, we skip all other fields
-      _data_stream->read(buffer.data(), 4);
-      extra_offset += 4;
-      auto const subfield_size = read_int<uint16_t>(&buffer[2]);
-      if (buffer[0] == 66 && buffer[1] == 67) {
-        // the block size subfield contains a single uint16 value, which is block_size - 1
-        CUDF_EXPECTS(subfield_size == sizeof(uint16_t), "malformed BGZIP extra subfield");
-        _data_stream->read(buffer.data(), sizeof(uint16_t));
-        _data_stream->seekg(remaining_size - 6, std::ios_base::cur);
-        auto const block_size_minus_one = read_int<uint16_t>(&buffer[0]);
-        return {block_size_minus_one + 1, extra_length};
-      } else {
-        _data_stream->seekg(subfield_size, std::ios_base::cur);
-        extra_offset += subfield_size;
-      }
-    }
-    CUDF_FAIL("missing BGZIP size extra subfield");
-  }
-
-  struct bgzip_footer {
-    uint32_t decompressed_size;
-  };
-
-  bgzip_footer read_footer()
-  {
-    std::array<char, 8> buffer{};
-    _data_stream->read(buffer.data(), sizeof(buffer));
-    return {read_int<uint32_t>(&buffer[4])};
-  }
-
   template <typename T>
   using pinned_host_vector =
     thrust::host_vector<T, thrust::system::cuda::experimental::pinned_allocator<T>>;
@@ -258,13 +196,13 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       return available_decompressed_size - read_pos;
     }
 
-    void read_block(bgzip_header header, std::istream& stream)
+    void read_block(detail::bgzip::header header, std::istream& stream)
     {
       h_compressed_blocks.resize(h_compressed_blocks.size() + header.data_size());
       stream.read(h_compressed_blocks.data() + compressed_size(), header.data_size());
     }
 
-    void add_block_offsets(bgzip_header header, bgzip_footer footer)
+    void add_block_offsets(detail::bgzip::header header, detail::bgzip::footer footer)
     {
       max_decompressed_size =
         std::max<std::size_t>(footer.decompressed_size, max_decompressed_size);
@@ -294,9 +232,9 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       // peek is necessary if we are already at the end, but didn't try to read another byte
       _data_stream->peek();
       if (_data_stream->eof() || _compressed_pos > _compressed_end) { break; }
-      auto header = read_header();
+      auto header = detail::bgzip::read_header(*_data_stream);
       _curr_blocks.read_block(header, *_data_stream);
-      auto footer = read_footer();
+      auto footer = detail::bgzip::read_footer(*_data_stream);
       _curr_blocks.add_block_offsets(header, footer);
       // for the last GZIP block, we restrict ourselves to the bytes up to _local_end
       // but only for the reader, not for decompression!
diff --git a/cpp/src/io/text/bgzip_utils.cpp b/cpp/src/io/text/bgzip_utils.cpp
new file mode 100644
index 00000000000..dd08387a6b5
--- /dev/null
+++ b/cpp/src/io/text/bgzip_utils.cpp
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <zlib.h>
+
+#include <cudf/io/text/detail/bgzip_utils.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <algorithm>
+#include <array>
+#include <fstream>
+#include <limits>
+
+namespace cudf::io::text::detail::bgzip {
+namespace {
+
+template <typename IntType>
+IntType read_int(char* data)
+{
+  IntType result{};
+  // we assume little-endian
+  std::memcpy(&result, &data[0], sizeof(result));
+  return result;
+}
+
+template <typename T>
+void write_int(std::ostream& output_stream, T val)
+{
+  std::array<char, sizeof(T)> bytes;
+  // we assume little-endian
+  std::memcpy(&bytes[0], &val, sizeof(T));
+  output_stream.write(bytes.data(), bytes.size());
+}
+
+}  // namespace
+
+std::array<char, 4> constexpr extra_blocklen_field_header{{66, 67, 2, 0}};
+
+header read_header(std::istream& input_stream)
+{
+  std::array<char, 12> buffer{};
+  input_stream.read(buffer.data(), sizeof(buffer));
+  std::array<uint8_t, 4> constexpr expected_header{{31, 139, 8, 4}};
+  CUDF_EXPECTS(
+    std::equal(
+      expected_header.begin(), expected_header.end(), reinterpret_cast<uint8_t*>(buffer.data())),
+    "malformed BGZIP header");
+  // we ignore the remaining bytes of the fixed header, since they don't matter to us
+  auto const extra_length = read_int<uint16_t>(&buffer[10]);
+  uint16_t extra_offset{};
+  // read all the extra subfields
+  while (extra_offset < extra_length) {
+    auto const remaining_size = extra_length - extra_offset;
+    CUDF_EXPECTS(remaining_size >= 4, "invalid extra field length");
+    // a subfield consists of 2 identifier bytes and a uint16 length
+    // 66/67 identifies a BGZIP block size field, we skip all other fields
+    input_stream.read(buffer.data(), 4);
+    extra_offset += 4;
+    auto const subfield_size = read_int<uint16_t>(&buffer[2]);
+    if (buffer[0] == extra_blocklen_field_header[0] &&
+        buffer[1] == extra_blocklen_field_header[1]) {
+      // the block size subfield contains a single uint16 value, which is block_size - 1
+      CUDF_EXPECTS(
+        buffer[2] == extra_blocklen_field_header[2] && buffer[3] == extra_blocklen_field_header[3],
+        "malformed BGZIP extra subfield");
+      input_stream.read(buffer.data(), sizeof(uint16_t));
+      input_stream.seekg(remaining_size - 6, std::ios_base::cur);
+      auto const block_size_minus_one = read_int<uint16_t>(&buffer[0]);
+      return {block_size_minus_one + 1, extra_length};
+    } else {
+      input_stream.seekg(subfield_size, std::ios_base::cur);
+      extra_offset += subfield_size;
+    }
+  }
+  CUDF_FAIL("missing BGZIP size extra subfield");
+}
+
+footer read_footer(std::istream& input_stream)
+{
+  std::array<char, 8> buffer{};
+  input_stream.read(buffer.data(), sizeof(buffer));
+  return {read_int<uint32_t>(&buffer[0]), read_int<uint32_t>(&buffer[4])};
+}
+
+void write_footer(std::ostream& output_stream, host_span<char const> data)
+{
+  // compute crc32 with zlib, this allows checking the generated files with external tools
+  write_int<uint32_t>(output_stream, crc32(0, (unsigned char*)data.data(), data.size()));
+  write_int<uint32_t>(output_stream, data.size());
+}
+
+void write_header(std::ostream& output_stream,
+                  uint16_t compressed_size,
+                  host_span<char const> pre_size_subfield,
+                  host_span<char const> post_size_subfield)
+{
+  std::array<uint8_t, 10> constexpr header_data{{
+    31,   // magic number
+    139,  // magic number
+    8,    // compression type: deflate
+    4,    // flags: extra header
+    0,    // mtime
+    0,    // mtime
+    0,    // mtime
+    0,    // mtime: irrelevant
+    4,    // xfl: irrelevant
+    3     // OS: irrelevant
+  }};
+  output_stream.write(reinterpret_cast<const char*>(header_data.data()), header_data.size());
+  auto const extra_size = pre_size_subfield.size() + extra_blocklen_field_header.size() +
+                          sizeof(uint16_t) + post_size_subfield.size();
+  auto const block_size =
+    header_data.size() + sizeof(uint16_t) + extra_size + compressed_size + 2 * sizeof(uint32_t);
+  write_int<uint16_t>(output_stream, extra_size);
+  output_stream.write(pre_size_subfield.data(), pre_size_subfield.size());
+  output_stream.write(extra_blocklen_field_header.data(), extra_blocklen_field_header.size());
+  CUDF_EXPECTS(block_size - 1 <= std::numeric_limits<uint16_t>::max(), "block size overflow");
+  write_int<uint16_t>(output_stream, block_size - 1);
+  output_stream.write(post_size_subfield.data(), post_size_subfield.size());
+}
+
+void write_uncompressed_block(std::ostream& output_stream,
+                              host_span<char const> data,
+                              host_span<char const> pre_size_subfields,
+                              host_span<char const> post_size_subfields)
+{
+  CUDF_EXPECTS(data.size() <= std::numeric_limits<uint16_t>::max(), "data size overflow");
+  write_header(output_stream, data.size() + 5, pre_size_subfields, post_size_subfields);
+  write_int<uint8_t>(output_stream, 1);
+  write_int<uint16_t>(output_stream, data.size());
+  write_int<uint16_t>(output_stream, ~static_cast<uint16_t>(data.size()));
+  output_stream.write(data.data(), data.size());
+  write_footer(output_stream, data);
+}
+
+void write_compressed_block(std::ostream& output_stream,
+                            host_span<char const> data,
+                            host_span<char const> pre_size_subfields,
+                            host_span<char const> post_size_subfields)
+{
+  CUDF_EXPECTS(data.size() <= std::numeric_limits<uint16_t>::max(), "data size overflow");
+  z_stream deflate_stream{};
+  // let's make sure we have enough space to store the data
+  std::vector<char> compressed_out(data.size() * 2 + 256);
+  deflate_stream.next_in   = reinterpret_cast<unsigned char*>(const_cast<char*>(data.data()));
+  deflate_stream.avail_in  = data.size();
+  deflate_stream.next_out  = reinterpret_cast<unsigned char*>(compressed_out.data());
+  deflate_stream.avail_out = compressed_out.size();
+  CUDF_EXPECTS(
+    deflateInit2(&deflate_stream,        // stream
+                 Z_DEFAULT_COMPRESSION,  // compression level
+                 Z_DEFLATED,             // method
+                 -15,  // log2 of window size (negative value means no ZLIB header/footer)
+                 9,    // mem level: best performance/most memory usage for compression
+                 Z_DEFAULT_STRATEGY  // strategy
+                 ) == Z_OK,
+    "deflateInit failed");
+  CUDF_EXPECTS(deflate(&deflate_stream, Z_FINISH) == Z_STREAM_END, "deflate failed");
+  CUDF_EXPECTS(deflateEnd(&deflate_stream) == Z_OK, "deflateEnd failed");
+  write_header(output_stream, deflate_stream.total_out, pre_size_subfields, post_size_subfields);
+  output_stream.write(compressed_out.data(), deflate_stream.total_out);
+  write_footer(output_stream, data);
+}
+
+}  // namespace cudf::io::text::detail::bgzip
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index e630e842f4e..8675dc891c1 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -230,6 +230,7 @@ ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp)
 ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
 ConfigureTest(MULTIBYTE_SPLIT_TEST io/text/multibyte_split_test.cpp)
 ConfigureTest(DATA_CHUNK_SOURCE_TEST io/text/data_chunk_source_test.cpp)
+target_link_libraries(DATA_CHUNK_SOURCE_TEST PRIVATE ZLIB::ZLIB)
 ConfigureTest(LOGICAL_STACK_TEST io/fst/logical_stack_test.cu)
 ConfigureTest(FST_TEST io/fst/fst_test.cu)
 ConfigureTest(TYPE_INFERENCE_TEST io/type_inference_test.cu)
diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp
index 115a66cdd95..7cb75aea8e2 100644
--- a/cpp/tests/io/text/data_chunk_source_test.cpp
+++ b/cpp/tests/io/text/data_chunk_source_test.cpp
@@ -18,6 +18,7 @@
 #include <cudf_test/cudf_gtest.hpp>
 
 #include <cudf/io/text/data_chunk_source_factories.hpp>
+#include <cudf/io/text/detail/bgzip_utils.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
@@ -125,102 +126,67 @@ TEST_F(DataChunkSourceTest, Host)
   test_source(content, *source);
 }
 
-template <typename T>
-void write_int(std::ostream& stream, T val)
-{
-  std::array<char, sizeof(T)> bytes;
-  // we assume little-endian
-  std::memcpy(&bytes[0], &val, sizeof(T));
-  stream.write(bytes.data(), bytes.size());
-}
+enum class compression { ENABLED, DISABLED };
 
-void write_bgzip_block(std::ostream& stream,
-                       const std::string& data,
-                       bool add_extra_garbage_before,
-                       bool add_extra_garbage_after)
-{
-  std::array<uint8_t, 10> const header{{
-    31,   // magic number
-    139,  // magic number
-    8,    // compression type: deflate
-    4,    // flags: extra header
-    0,    // mtime
-    0,    // mtime
-    0,    // mtime
-    0,    // mtime: irrelevant
-    4,    // xfl: irrelevant
-    3     // OS: irrelevant
-  }};
-  std::array<char, 4> const extra_blocklen_field{{66, 67, 2, 0}};
-  std::array<char, 11> const extra_garbage_field1{{13,  // magic number
-                                                   37,  // magic number
-                                                   7,   // field length
-                                                   0,   // field length
-                                                   1,
-                                                   2,
-                                                   3,
-                                                   4,
-                                                   5,
-                                                   6,
-                                                   7}};
-  std::array<char, 23> const extra_garbage_field2{{12,  // magic number
-                                                   34,  // magic number
-                                                   2,   // field length
-                                                   0,   // field length
-                                                   1,  2,
-                                                   56,  // magic number
-                                                   78,  // magic number
-                                                   1,   // field length
-                                                   0,   // field length
-                                                   3,   //
-                                                   90,  // magic number
-                                                   12,  // magic number
-                                                   8,   // field length
-                                                   0,   // field length
-                                                   1,  2, 3, 4, 5, 6, 7, 8}};
-  stream.write(reinterpret_cast<const char*>(header.data()), header.size());
-  uint16_t extra_size = extra_blocklen_field.size() + 2;
-  if (add_extra_garbage_before) { extra_size += extra_garbage_field1.size(); }
-  if (add_extra_garbage_after) { extra_size += extra_garbage_field2.size(); }
-  write_int(stream, extra_size);
-  if (add_extra_garbage_before) {
-    stream.write(extra_garbage_field1.data(), extra_garbage_field1.size());
-  }
-  stream.write(extra_blocklen_field.data(), extra_blocklen_field.size());
-  auto const compressed_size          = data.size() + 5;
-  uint16_t const block_size_minus_one = compressed_size + 19 + extra_size;
-  write_int(stream, block_size_minus_one);
-  if (add_extra_garbage_after) {
-    stream.write(extra_garbage_field2.data(), extra_garbage_field2.size());
-  }
-  write_int<uint8_t>(stream, 1);
-  write_int<uint16_t>(stream, data.size());
-  write_int<uint16_t>(stream, ~static_cast<uint16_t>(data.size()));
-  stream.write(data.data(), data.size());
-  // this does not produce a valid file, since we write 0 as the CRC
-  // the parser ignores the checksum, so it doesn't matter to the test
-  // to check output with gzip, plug in the CRC of `data` here.
-  write_int<uint32_t>(stream, 0);
-  write_int<uint32_t>(stream, data.size());
-}
+enum class eof { ADD_EOF_BLOCK, NO_EOF_BLOCK };
 
-void write_bgzip(std::ostream& stream,
-                 const std::string& data,
+void write_bgzip(std::ostream& output_stream,
+                 cudf::host_span<const char> data,
                  std::default_random_engine& rng,
-                 bool write_eof = true)
+                 compression compress,
+                 eof add_eof)
 {
+  std::vector<char> const extra_garbage_fields1{{13,  // magic number
+                                                 37,  // magic number
+                                                 7,   // field length
+                                                 0,   // field length
+                                                 1,
+                                                 2,
+                                                 3,
+                                                 4,
+                                                 5,
+                                                 6,
+                                                 7}};
+  std::vector<char> const extra_garbage_fields2{{12,  // magic number
+                                                 34,  // magic number
+                                                 2,   // field length
+                                                 0,   // field length
+                                                 1,  2,
+                                                 56,  // magic number
+                                                 78,  // magic number
+                                                 1,   // field length
+                                                 0,   // field length
+                                                 3,   //
+                                                 90,  // magic number
+                                                 12,  // magic number
+                                                 8,   // field length
+                                                 0,   // field length
+                                                 1,  2, 3, 4, 5, 6, 7, 8}};
   // make sure the block size with header stays below 65536
   std::uniform_int_distribution<std::size_t> block_size_dist{1, 65000};
   auto begin     = data.begin();
   auto const end = data.end();
   int i          = 0;
   while (begin < end) {
+    using cudf::host_span;
     auto len = std::min<std::size_t>(end - begin, block_size_dist(rng));
-    write_bgzip_block(stream, std::string{begin, begin + len}, i & 1, i & 2);
+    host_span<char const> const garbage_before =
+      i & 1 ? extra_garbage_fields1 : host_span<char const>{};
+    host_span<char const> const garbage_after =
+      i & 2 ? extra_garbage_fields2 : host_span<char const>{};
+    if (compress == compression::ENABLED) {
+      cudf::io::text::detail::bgzip::write_compressed_block(
+        output_stream, {begin, len}, garbage_before, garbage_after);
+    } else {
+      cudf::io::text::detail::bgzip::write_uncompressed_block(
+        output_stream, {begin, len}, garbage_before, garbage_after);
+    }
     begin += len;
     i++;
   }
-  if (write_eof) { write_bgzip_block(stream, {}, false, false); }
+  if (add_eof == eof::ADD_EOF_BLOCK) {
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
+  }
 }
 
 TEST_F(DataChunkSourceTest, BgzipSource)
@@ -231,9 +197,9 @@ TEST_F(DataChunkSourceTest, BgzipSource)
     input = input + input;
   }
   {
-    std::ofstream stream{filename};
+    std::ofstream output_stream{filename};
     std::default_random_engine rng{};
-    write_bgzip(stream, input, rng);
+    write_bgzip(output_stream, input, rng, compression::DISABLED, eof::ADD_EOF_BLOCK);
   }
 
   auto const source = cudf::io::text::make_source_from_bgzip_file(filename);
@@ -243,7 +209,7 @@ TEST_F(DataChunkSourceTest, BgzipSource)
 
 TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets)
 {
-  auto const filename = temp_env->get_temp_filepath("bgzip_source");
+  auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets");
   std::string input{"bananarama"};
   for (int i = 0; i < 24; i++) {
     input = input + input;
@@ -260,16 +226,18 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets)
   std::size_t const begin_local_offset{data_garbage.size()};
   std::size_t const end_local_offset{endinput.size()};
   {
-    std::ofstream stream{filename};
-    stream.write(padding_garbage.data(), padding_garbage.size());
+    std::ofstream output_stream{filename};
+    output_stream.write(padding_garbage.data(), padding_garbage.size());
     std::default_random_engine rng{};
-    begin_compressed_offset = stream.tellp();
-    write_bgzip_block(stream, data_garbage + begininput, false, false);
-    write_bgzip(stream, input, rng, false);
-    end_compressed_offset = stream.tellp();
-    write_bgzip_block(stream, endinput + data_garbage + data_garbage, false, false);
-    write_bgzip_block(stream, {}, false, false);
-    stream.write(padding_garbage.data(), padding_garbage.size());
+    begin_compressed_offset = output_stream.tellp();
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream,
+                                                            data_garbage + begininput);
+    write_bgzip(output_stream, input, rng, compression::DISABLED, eof::NO_EOF_BLOCK);
+    end_compressed_offset = output_stream.tellp();
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream,
+                                                            endinput + data_garbage + data_garbage);
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
+    output_stream.write(padding_garbage.data(), padding_garbage.size());
   }
   input = begininput + input + endinput;
 
@@ -283,7 +251,7 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets)
 
 TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock)
 {
-  auto const filename = temp_env->get_temp_filepath("bgzip_source");
+  auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets_single_block");
   std::string const input{"collection unit brings"};
   std::string const head_garbage{"garbage"};
   std::string const tail_garbage{"GARBAGE"};
@@ -292,9 +260,10 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock)
   std::size_t const begin_local_offset{head_garbage.size()};
   std::size_t const end_local_offset{head_garbage.size() + input.size()};
   {
-    std::ofstream stream{filename};
-    write_bgzip_block(stream, head_garbage + input + tail_garbage, false, false);
-    write_bgzip_block(stream, {}, false, false);
+    std::ofstream output_stream{filename};
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream,
+                                                            head_garbage + input + tail_garbage);
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
   }
 
   auto const source =
@@ -307,7 +276,7 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock)
 
 TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk)
 {
-  auto const filename = temp_env->get_temp_filepath("bgzip_source");
+  auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets_single_chunk");
   std::string const input{"collection unit brings"};
   std::string const head_garbage{"garbage"};
   std::string const tail_garbage{"GARBAGE"};
@@ -316,11 +285,13 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk)
   std::size_t const begin_local_offset{head_garbage.size()};
   std::size_t const end_local_offset{input.size() - 10};
   {
-    std::ofstream stream{filename};
-    write_bgzip_block(stream, head_garbage + input.substr(0, 10), false, false);
-    end_compressed_offset = stream.tellp();
-    write_bgzip_block(stream, input.substr(10) + tail_garbage, false, false);
-    write_bgzip_block(stream, {}, false, false);
+    std::ofstream output_stream{filename};
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream,
+                                                            head_garbage + input.substr(0, 10));
+    end_compressed_offset = output_stream.tellp();
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream,
+                                                            input.substr(10) + tail_garbage);
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
   }
 
   auto const source =
@@ -331,4 +302,44 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk)
   test_source(input, *source);
 }
 
+TEST_F(DataChunkSourceTest, BgzipCompressedSourceVirtualOffsets)
+{
+  auto const filename = temp_env->get_temp_filepath("bgzip_source_compressed_offsets");
+  std::string input{"bananarama"};
+  for (int i = 0; i < 24; i++) {
+    input = input + input;
+  }
+  std::string padding_garbage{"garbage"};
+  for (int i = 0; i < 10; i++) {
+    padding_garbage = padding_garbage + padding_garbage;
+  }
+  std::string const data_garbage{"GARBAGE"};
+  std::string const begininput{"begin of bananarama"};
+  std::string const endinput{"end of bananarama"};
+  std::size_t begin_compressed_offset{};
+  std::size_t end_compressed_offset{};
+  std::size_t const begin_local_offset{data_garbage.size()};
+  std::size_t const end_local_offset{endinput.size()};
+  {
+    std::ofstream output_stream{filename};
+    output_stream.write(padding_garbage.data(), padding_garbage.size());
+    std::default_random_engine rng{};
+    begin_compressed_offset = output_stream.tellp();
+    cudf::io::text::detail::bgzip::write_compressed_block(output_stream, data_garbage + begininput);
+    write_bgzip(output_stream, input, rng, compression::ENABLED, eof::NO_EOF_BLOCK);
+    end_compressed_offset = output_stream.tellp();
+    cudf::io::text::detail::bgzip::write_compressed_block(output_stream,
+                                                          endinput + data_garbage + data_garbage);
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
+    output_stream.write(padding_garbage.data(), padding_garbage.size());
+  }
+  input = begininput + input + endinput;
+
+  auto source =
+    cudf::io::text::make_source_from_bgzip_file(filename,
+                                                begin_compressed_offset << 16 | begin_local_offset,
+                                                end_compressed_offset << 16 | end_local_offset);
+  test_source(input, *source);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From 586907bf5dc6c897229ed365bdc49a5908cebfe3 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 10 Oct 2022 15:59:28 -0500
Subject: [PATCH 018/202] Fix pre-commit copyright check (#11860)

This PR improves the copyright check script to handle cases where the ancestor `branch-*` does not have an upstream set.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cudf/pull/11860
---
 ci/checks/copyright.py | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/ci/checks/copyright.py b/ci/checks/copyright.py
index 61e30d7922e..83f43183f71 100644
--- a/ci/checks/copyright.py
+++ b/ci/checks/copyright.py
@@ -68,20 +68,40 @@ def modifiedFiles():
     we can read only the staged changes.
     """
     repo = git.Repo()
-    # TARGET_BRANCH is defined in CI
+    # Use the environment variable TARGET_BRANCH (defined in CI) if possible
     target_branch = os.environ.get("TARGET_BRANCH")
     if target_branch is None:
         # Fall back to the closest branch if not on CI
         target_branch = repo.git.describe(
             all=True, tags=True, match="branch-*", abbrev=0
         ).lstrip("heads/")
-    try:
-        # Use the tracking branch of the local reference if it exists
+
+    upstream_target_branch = None
+    if target_branch in repo.heads:
+        # Use the tracking branch of the local reference if it exists. This
+        # returns None if no tracking branch is set.
         upstream_target_branch = repo.heads[target_branch].tracking_branch()
-    except IndexError:
-        # Fall back to the remote reference (this happens on CI because the
-        # only local branch reference is current-pr-branch)
-        upstream_target_branch = repo.remote().refs[target_branch]
+    if upstream_target_branch is None:
+        # Fall back to the remote with the newest target_branch. This code
+        # path is used on CI because the only local branch reference is
+        # current-pr-branch, and thus target_branch is not in repo.heads.
+        # This also happens if no tracking branch is defined for the local
+        # target_branch. We use the remote with the latest commit if
+        # multiple remotes are defined.
+        candidate_branches = [
+            remote.refs[target_branch] for remote in repo.remotes
+            if target_branch in remote.refs
+        ]
+        if len(candidate_branches) > 0:
+            upstream_target_branch = sorted(
+                candidate_branches,
+                key=lambda branch: branch.commit.committed_datetime,
+            )[-1]
+        else:
+            # If no remotes are defined, try to use the local version of the
+            # target_branch. If this fails, the repo configuration must be very
+            # strange and we can fix this script on a case-by-case basis.
+            upstream_target_branch = repo.heads[target_branch]
     merge_base = repo.merge_base("HEAD", upstream_target_branch.commit)[0]
     diff = merge_base.diff()
     changed_files = {f for f in diff if f.b_path is not None}

From 5b51591e27d6480292f9e2602d8175b3276868b1 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 10 Oct 2022 15:34:09 -0700
Subject: [PATCH 019/202] Remove "experimental" warning for struct columns in
 ORC reader and writer (#11880)

Closes https://github.com/rapidsai/cudf/issues/11484

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/11880
---
 cpp/include/cudf/io/orc.hpp        | 6 ------
 python/cudf/cudf/io/orc.py         | 5 -----
 python/cudf/cudf/tests/test_orc.py | 3 ---
 python/cudf/cudf/utils/ioutils.py  | 7 -------
 4 files changed, 21 deletions(-)

diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp
index 7f3cb95e4b2..b1e2197a868 100644
--- a/cpp/include/cudf/io/orc.hpp
+++ b/cpp/include/cudf/io/orc.hpp
@@ -378,9 +378,6 @@ class orc_reader_options_builder {
  *  auto result  = cudf::io::read_orc(options);
  * @endcode
  *
- * Note: Support for reading files with struct columns is currently experimental, the output may not
- * be as reliable as reading for other datatypes.
- *
  * @param options Settings for controlling reading behavior
  * @param mr Device memory resource used to allocate device memory of the table in the returned
  * table_with_metadata.
@@ -783,9 +780,6 @@ class orc_writer_options_builder {
  *  cudf::io::write_orc(options);
  * @endcode
  *
- * Note: Support for writing tables with struct columns is currently experimental, the output may
- * not be as reliable as writing for other datatypes.
- *
  * @param options Settings for controlling reading behavior
  * @param mr Device memory resource to use for device memory allocation
  */
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index 718b9c4144f..b9ce07466e5 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -418,11 +418,6 @@ def to_orc(
     """{docstring}"""
 
     for col in df._data.columns:
-        if isinstance(col, cudf.core.column.StructColumn):
-            warnings.warn(
-                "Support for writing tables with struct columns is "
-                "currently experimental."
-            )
         if isinstance(col, cudf.core.column.CategoricalColumn):
             raise NotImplementedError(
                 "Writing to ORC format is not yet supported with "
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 422c2588eb0..5aa049db31a 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1555,7 +1555,6 @@ def test_names_in_struct_dtype_nesting(datadir):
     assert edf.dtypes.equals(got.dtypes)
 
 
-@pytest.mark.filterwarnings("ignore:.*struct.*experimental")
 def test_writer_lists_structs(list_struct_buff):
     df_in = cudf.read_orc(list_struct_buff)
 
@@ -1567,7 +1566,6 @@ def test_writer_lists_structs(list_struct_buff):
     assert pyarrow_tbl.equals(df_in.to_arrow())
 
 
-@pytest.mark.filterwarnings("ignore:.*struct.*experimental")
 @pytest.mark.parametrize(
     "data",
     [
@@ -1668,7 +1666,6 @@ def test_empty_statistics():
         assert stats[0]["i"].get("sum") == 1
 
 
-@pytest.mark.filterwarnings("ignore:.*struct.*experimental")
 @pytest.mark.parametrize(
     "equivalent_columns",
     [
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 8bb246c9c84..366b2e0ebae 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -301,8 +301,6 @@
 
 Notes
 -----
-Support for reading files with struct columns is currently experimental,
-the output may not be as reliable as reading for other datatypes.
 {remote_data_sources}
 
 Examples
@@ -447,11 +445,6 @@
     Note that this option only affects columns of ListDtype. Names of other
     column types will be ignored.
 
-Notes
------
-Support for writing tables with struct columns is currently experimental,
-the output may not be as reliable as writing for other datatypes.
-
 See Also
 --------
 cudf.read_orc

From 26f3e76ceddda6d3517f46825e3a8bc2fce0381f Mon Sep 17 00:00:00 2001
From: Liangcai Li <firestarmanllc@gmail.com>
Date: Tue, 11 Oct 2022 09:06:14 +0800
Subject: [PATCH 020/202] ArrowIPCTableWriter writes en empty batch in the case
 of an empty table. (#11883)

closes https://github.com/rapidsai/cudf/issues/11882

Updated the `ArrowIPCTableWriter` to write en empty batch explicitly in the case of an empty table, because the Arrow IPC writer will write no batches out for this case, leading to an error as below when calling the `Pyarrow.Table.from_batches` without specifying a schema.
```
E                     File "pyarrow/table.pxi", line 1609, in pyarrow.lib.Table.from_batches
E                   ValueError: Must pass schema, or at least one RecordBatch
```

Signed-off-by: Liangcai Li <firestarmanllc@gmail.com>

Authors:
  - Liangcai Li (https://github.com/firestarman)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11883
---
 java/src/main/native/src/TableJni.cpp         | 10 ++++++-
 .../test/java/ai/rapids/cudf/TableTest.java   | 29 +++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index ad280cad5fd..c23c5a3ccb2 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -258,7 +258,15 @@ class native_arrow_ipc_writer_handle final {
       writer = *tmp_writer;
       initialized = true;
     }
-    writer->WriteTable(*arrow_tab, max_chunk);
+    if (arrow_tab->num_rows() == 0) {
+      // Arrow C++ IPC writer will not write an empty batch in the case of an
+      // empty table, so need to write an empty batch explicitly.
+      // For more please see https://issues.apache.org/jira/browse/ARROW-17912.
+      auto empty_batch = arrow::RecordBatch::MakeEmpty(arrow_tab->schema());
+      writer->WriteRecordBatch(*(*empty_batch));
+    } else {
+      writer->WriteTable(*arrow_tab, max_chunk);
+    }
   }
 
   void close() {
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 194c1094caf..4649a0e3507 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -7937,6 +7937,35 @@ void testArrowIPCWriteToBufferChunked() {
     }
   }
 
+  @Test
+  void testArrowIPCWriteEmptyToBufferChunked() {
+    try (Table emptyTable = new Table.TestBuilder().timestampDayColumn().build();
+         MyBufferConsumer consumer = new MyBufferConsumer()) {
+      ArrowIPCWriterOptions options = ArrowIPCWriterOptions.builder()
+              .withColumnNames("day")
+              .build();
+      try (TableWriter writer = Table.writeArrowIPCChunked(options, consumer)) {
+        writer.write(emptyTable);
+      }
+      try (StreamedTableReader reader = Table.readArrowIPCChunked(new MyBufferProvider(consumer))) {
+        boolean done = false;
+        int count = 0;
+        while (!done) {
+          try (Table t = reader.getNextIfAvailable()) {
+            if (t == null) {
+              done = true;
+            } else {
+              assertTablesAreEqual(emptyTable, t);
+              count++;
+            }
+          }
+        }
+        // Expect one empty batch for the empty table.
+        assertEquals(1, count);
+      }
+    }
+  }
+
   @Test
   void testORCWriteToBufferChunked() {
     String[] selectedColumns = WriteUtils.getAllColumns(false);

From 566b3d105bf58bcd6050a539fabb022782e050ab Mon Sep 17 00:00:00 2001
From: Gregory Kimball <gregory.kimball@sunpowercorp.com>
Date: Tue, 11 Oct 2022 08:17:08 -0700
Subject: [PATCH 021/202] Conform "bench_isin" to match generator column names
 (#11549)

The version of `bench_isin` merged in #11125 used key and column names of the format `f"key{i}"` rather than the format `f"{string.ascii_lowercase[i]}"` as is used in the dataframe generator. As a result the `isin` benchmark using a dictionary argument short-circuits with no matching keys, and the `isin` benchmark using a dataframe argument finds no matches.

This PR also adjusts the `isin` arguments from `range(1000)` to `range(50)` to better match the input dataframe cardinality of 100. With `range(1000)`, every element matches but with `range(50)` only 50% of the elements match.

Authors:
  - Gregory Kimball (https://github.com/GregoryKimball)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11549
---
 python/cudf/benchmarks/API/bench_dataframe.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py
index 9bad637f6ae..42bfa854396 100644
--- a/python/cudf/benchmarks/API/bench_dataframe.py
+++ b/python/cudf/benchmarks/API/bench_dataframe.py
@@ -41,14 +41,16 @@ def bench_merge(benchmark, dataframe, num_key_cols):
 @pytest.mark.parametrize(
     "values",
     [
-        range(1000),
-        {f"key{i}": range(1000) for i in range(10)},
-        cudf.DataFrame({f"key{i}": range(1000) for i in range(10)}),
-        cudf.Series(range(1000)),
+        lambda: range(50),
+        lambda: {f"{string.ascii_lowercase[i]}": range(50) for i in range(10)},
+        lambda: cudf.DataFrame(
+            {f"{string.ascii_lowercase[i]}": range(50) for i in range(10)}
+        ),
+        lambda: cudf.Series(range(50)),
     ],
 )
 def bench_isin(benchmark, dataframe, values):
-    benchmark(dataframe.isin, values)
+    benchmark(dataframe.isin, values())
 
 
 @pytest.fixture(

From 9ba6142f6eb6a7d8f9903a5dfaf9af22cdd76b8a Mon Sep 17 00:00:00 2001
From: Gregory Kimball <gregory.kimball@sunpowercorp.com>
Date: Tue, 11 Oct 2022 08:18:10 -0700
Subject: [PATCH 022/202] Use public APIs in STREAM_COMPACTION_NVBENCH (#11892)

Use `state.set_cuda_stream` to set the stream for the nvbench benchmark. Then run `state.exec` on the public API instead of the detail API, e.g. `cudf::distinct` instead of `cudf::detail::distinct`.

Authors:
  - Gregory Kimball (https://github.com/GregoryKimball)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11892
---
 cpp/benchmarks/stream_compaction/distinct.cpp | 28 +++++++++----------
 cpp/benchmarks/stream_compaction/unique.cpp   |  7 ++---
 2 files changed, 16 insertions(+), 19 deletions(-)

diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index ad837bc4caa..23960b24b89 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -18,8 +18,8 @@
 #include <benchmarks/fixture/rmm_pool_raii.hpp>
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/detail/stream_compaction.hpp>
 #include <cudf/lists/list_view.hpp>
+#include <cudf/stream_compaction.hpp>
 #include <cudf/types.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -41,14 +41,13 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
   auto input_column = source_column->view();
   auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
 
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    rmm::cuda_stream_view stream_view{launch.get_stream()};
-    auto result = cudf::detail::distinct(input_table,
-                                         {0},
-                                         cudf::duplicate_keep_option::KEEP_ANY,
-                                         cudf::null_equality::EQUAL,
-                                         cudf::nan_equality::ALL_EQUAL,
-                                         stream_view);
+    auto result = cudf::distinct(input_table,
+                                 {0},
+                                 cudf::duplicate_keep_option::KEEP_ANY,
+                                 cudf::null_equality::EQUAL,
+                                 cudf::nan_equality::ALL_EQUAL);
   });
 }
 
@@ -84,14 +83,13 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
   auto const table = create_random_table(
     {dtype}, table_size_bytes{static_cast<size_t>(size)}, data_profile{builder}, 0);
 
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    rmm::cuda_stream_view stream_view{launch.get_stream()};
-    auto result = cudf::detail::distinct(*table,
-                                         {0},
-                                         cudf::duplicate_keep_option::KEEP_ANY,
-                                         cudf::null_equality::EQUAL,
-                                         cudf::nan_equality::ALL_EQUAL,
-                                         stream_view);
+    auto result = cudf::distinct(*table,
+                                 {0},
+                                 cudf::duplicate_keep_option::KEEP_ANY,
+                                 cudf::null_equality::EQUAL,
+                                 cudf::nan_equality::ALL_EQUAL);
   });
 }
 
diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp
index 6b586581408..bcf9628b19f 100644
--- a/cpp/benchmarks/stream_compaction/unique.cpp
+++ b/cpp/benchmarks/stream_compaction/unique.cpp
@@ -18,7 +18,7 @@
 #include <benchmarks/fixture/rmm_pool_raii.hpp>
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/detail/stream_compaction.hpp>
+#include <cudf/stream_compaction.hpp>
 #include <cudf/types.hpp>
 
 #include <nvbench/nvbench.cuh>
@@ -62,10 +62,9 @@ void nvbench_unique(nvbench::state& state, nvbench::type_list<Type, nvbench::enu
   auto input_column = source_column->view();
   auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
 
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    rmm::cuda_stream_view stream_view{launch.get_stream()};
-    auto result =
-      cudf::detail::unique(input_table, {0}, Keep, cudf::null_equality::EQUAL, stream_view);
+    auto result = cudf::unique(input_table, {0}, Keep, cudf::null_equality::EQUAL);
   });
 }
 

From a921f5daf6a3753e04bd01c408a5a1de5b208589 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 11 Oct 2022 12:54:44 -0500
Subject: [PATCH 023/202] Error on `ListColumn` or any new unsupported column
 in `cudf.Index` (#11902)

This PR raises a `NotImplementedError` for `ListColumn` or any new column that isn't supported by `cudf.Index` yet.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/11902
---
 python/cudf/cudf/core/index.py       |  5 +++++
 python/cudf/cudf/tests/test_index.py | 12 ++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 3d77ed15027..5b101f74664 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -118,6 +118,11 @@ def _index_from_data(data: MutableMapping, name: Any = None):
             index_class_type = CategoricalIndex
         elif isinstance(values, (IntervalColumn, StructColumn)):
             index_class_type = IntervalIndex
+        else:
+            raise NotImplementedError(
+                "Unsupported column type passed to "
+                f"create an Index: {type(values)}"
+            )
     else:
         index_class_type = cudf.MultiIndex
     return index_class_type._from_data(data, name)
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 358d5e2170e..894c87add4b 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2783,3 +2783,15 @@ def test_index_null_values():
     gidx = cudf.Index([1.0, None, 3, 0, None])
     with pytest.raises(ValueError):
         gidx.values
+
+
+def test_index_error_list_index():
+    s = cudf.Series([[1, 2], [2], [4]])
+    with pytest.raises(
+        NotImplementedError,
+        match=re.escape(
+            "Unsupported column type passed to create an "
+            "Index: <class 'cudf.core.column.lists.ListColumn'>"
+        ),
+    ):
+        cudf.Index(s)

From 7032cc3c073f5d8842765adc4dc32883c943ef2b Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 11 Oct 2022 11:12:05 -0700
Subject: [PATCH 024/202] Add coverage for string UDF tests. (#11891)

Many PRs are currently showing Codecov patch status check failures that appear to be the result of not uploading coverage reports for the string UDF tests. This PR should enable the missing coverage and ensure that we are actually measuring coverage of these code paths.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - https://github.com/brandon-b-miller
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cudf/pull/11891
---
 ci/gpu/build.sh | 2 +-
 codecov.yml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index afcc80a6803..41dac0e5e0f 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -294,7 +294,7 @@ elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then
 else
     cd "$WORKSPACE/python/cudf/cudf"
     gpuci_logger "Python py.test retest cuDF UDFs"
-    py.test tests/test_udf_masked_ops.py -n 8 --cache-clear
+    py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests
 fi
 
 # Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
diff --git a/codecov.yml b/codecov.yml
index f9d0f906807..d45c7e2990f 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -2,7 +2,7 @@
 coverage:
   status:
     project: off
-    patch: on
+    patch:
       default:
         target: auto
         threshold: 0%

From 387192c2b3659c872b27497aef033a5abcdb5444 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Tue, 11 Oct 2022 16:04:42 -0400
Subject: [PATCH 025/202] Add ngroup (#11871)

Adds the `GroupBy.ngroup()` method. Closes #11848

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11871
---
 docs/cudf/source/api_docs/groupby.rst    |   1 +
 python/cudf/cudf/_lib/cpp/null_mask.pxd  |  22 +++--
 python/cudf/cudf/_lib/groupby.pyx        |  25 ++++-
 python/cudf/cudf/_lib/null_mask.pyx      |  29 ++++++
 python/cudf/cudf/core/groupby/groupby.py | 114 +++++++++++++++++++++--
 python/cudf/cudf/tests/test_groupby.py   |  33 +++++++
 6 files changed, 211 insertions(+), 13 deletions(-)

diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst
index 141e5adba93..f36951749fb 100644
--- a/docs/cudf/source/api_docs/groupby.rst
+++ b/docs/cudf/source/api_docs/groupby.rst
@@ -53,6 +53,7 @@ Computations / descriptive stats
    GroupBy.mean
    GroupBy.median
    GroupBy.min
+   GroupBy.ngroup
    GroupBy.nth
    GroupBy.pad
    GroupBy.prod
diff --git a/python/cudf/cudf/_lib/cpp/null_mask.pxd b/python/cudf/cudf/_lib/cpp/null_mask.pxd
index c225a16297b..3050a9f3459 100644
--- a/python/cudf/cudf/_lib/cpp/null_mask.pxd
+++ b/python/cudf/cudf/_lib/cpp/null_mask.pxd
@@ -1,11 +1,13 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t
+from libcpp.pair cimport pair
 
 from rmm._lib.device_buffer cimport device_buffer
 
-cimport cudf._lib.cpp.types as libcudf_types
 from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.table.table_view cimport table_view
+from cudf._lib.cpp.types cimport mask_state, size_type
 
 ctypedef int32_t underlying_type_t_mask_state
 
@@ -16,15 +18,23 @@ cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
     ) except +
 
     cdef size_t bitmask_allocation_size_bytes (
-        libcudf_types.size_type number_of_bits,
+        size_type number_of_bits,
         size_t padding_boundary
     ) except +
 
     cdef size_t bitmask_allocation_size_bytes (
-        libcudf_types.size_type number_of_bits
+        size_type number_of_bits
     ) except +
 
     cdef device_buffer create_null_mask (
-        libcudf_types.size_type size,
-        libcudf_types.mask_state state
+        size_type size,
+        mask_state state
     ) except +
+
+    cdef pair[device_buffer, size_type] bitmask_and(
+        table_view view
+    )
+
+    cdef pair[device_buffer, size_type] bitmask_or(
+        table_view view
+    )
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index be5bb2741b4..08a1d74f80f 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -121,13 +121,36 @@ cdef class GroupBy:
         self.dropna = dropna
 
     def groups(self, list values):
+        """
+        Perform a sort groupby, using ``self.keys`` as the key columns
+        and ``values`` as the value columns.
+
+        Parameters
+        ----------
+        values: list of Columns
+            The value columns
+
+        Returns
+        -------
+        grouped_keys: list of Columns
+            The grouped key columns
+        grouped_values: list of Columns
+            The grouped value columns
+        offsets: list of integers
+            Integer offsets such that offsets[i+1] - offsets[i]
+            represents the size of group `i`.
+        """
         cdef table_view values_view = table_view_from_columns(values)
 
         with nogil:
             c_groups = move(self.c_obj.get()[0].get_groups(values_view))
 
         grouped_key_cols = columns_from_unique_ptr(move(c_groups.keys))
-        grouped_value_cols = columns_from_unique_ptr(move(c_groups.values))
+
+        if values:
+            grouped_value_cols = columns_from_unique_ptr(move(c_groups.values))
+        else:
+            grouped_value_cols = []
         return grouped_key_cols, grouped_value_cols, c_groups.offsets
 
     def aggregate_internal(self, values, aggregations):
diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
index b0ee28baf29..976fe0e78fc 100644
--- a/python/cudf/cudf/_lib/null_mask.pyx
+++ b/python/cudf/cudf/_lib/null_mask.pyx
@@ -3,6 +3,7 @@
 from enum import Enum
 
 from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.pair cimport pair
 from libcpp.utility cimport move
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
@@ -11,11 +12,15 @@ from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.null_mask cimport (
     bitmask_allocation_size_bytes as cpp_bitmask_allocation_size_bytes,
+    bitmask_and as cpp_bitmask_and,
+    bitmask_or as cpp_bitmask_or,
     copy_bitmask as cpp_copy_bitmask,
     create_null_mask as cpp_create_null_mask,
     underlying_type_t_mask_state,
 )
+from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport mask_state, size_type
+from cudf._lib.utils cimport table_view_from_columns
 
 from cudf.core.buffer import as_device_buffer_like
 
@@ -95,3 +100,27 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
     rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
     buf = as_device_buffer_like(rmm_db)
     return buf
+
+
+def bitmask_and(columns: list):
+    cdef table_view c_view = table_view_from_columns(columns)
+    cdef pair[device_buffer, size_type] c_result
+    cdef unique_ptr[device_buffer] up_db
+    with nogil:
+        c_result = move(cpp_bitmask_and(c_view))
+        up_db = make_unique[device_buffer](move(c_result.first))
+    dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
+    buf = as_device_buffer_like(dbuf)
+    return buf, c_result.second
+
+
+def bitmask_or(columns: list):
+    cdef table_view c_view = table_view_from_columns(columns)
+    cdef pair[device_buffer, size_type] c_result
+    cdef unique_ptr[device_buffer] up_db
+    with nogil:
+        c_result = move(cpp_bitmask_or(c_view))
+        up_db = make_unique[device_buffer](move(c_result.first))
+    dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
+    buf = as_device_buffer_like(dbuf)
+    return buf, c_result.second
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index c96407a7ff9..0ab64bd985a 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -8,11 +8,13 @@
 from functools import cached_property
 from typing import Any, Iterable, List, Tuple, Union
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 
 import cudf
 from cudf._lib import groupby as libgroupby
+from cudf._lib.null_mask import bitmask_or
 from cudf._lib.reshape import interleave_columns
 from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
 from cudf.api.types import is_list_like
@@ -544,6 +546,88 @@ def nth(self, n):
 
         return result[sizes > n]
 
+    def ngroup(self, ascending=True):
+        """
+        Number each group from 0 to the number of groups - 1.
+
+        This is the enumerative complement of cumcount. Note that the
+        numbers given to the groups match the order in which the groups
+        would be seen when iterating over the groupby object, not the
+        order they are first observed.
+
+        Parameters
+        ----------
+        ascending : bool, default True
+            If False, number in reverse, from number of group - 1 to 0.
+
+        Returns
+        -------
+        Series
+            Unique numbers for each group.
+
+        See Also
+        --------
+        .cumcount : Number the rows in each group.
+
+        Examples
+        --------
+        >>> df = cudf.DataFrame({"A": list("aaabba")})
+        >>> df
+           A
+        0  a
+        1  a
+        2  a
+        3  b
+        4  b
+        5  a
+        >>> df.groupby('A').ngroup()
+        0    0
+        1    0
+        2    0
+        3    1
+        4    1
+        5    0
+        dtype: int64
+        >>> df.groupby('A').ngroup(ascending=False)
+        0    1
+        1    1
+        2    1
+        3    0
+        4    0
+        5    1
+        dtype: int64
+        >>> df.groupby(["A", [1,1,2,3,2,1]]).ngroup()
+        0    0
+        1    0
+        2    1
+        3    3
+        4    2
+        5    0
+        dtype: int64
+        """
+        num_groups = len(index := self.grouping.keys.unique())
+        _, has_null_group = bitmask_or([*index._columns])
+
+        if ascending:
+            if has_null_group:
+                group_ids = cudf.Series._from_data(
+                    {None: cp.arange(-1, num_groups - 1)}
+                )
+            else:
+                group_ids = cudf.Series._from_data(
+                    {None: cp.arange(num_groups)}
+                )
+        else:
+            group_ids = cudf.Series._from_data(
+                {None: cp.arange(num_groups - 1, -1, -1)}
+            )
+
+        if has_null_group:
+            group_ids.iloc[0] = cudf.NA
+
+        group_ids._index = index
+        return self._broadcast(group_ids)
+
     def serialize(self):
         header = {}
         frames = []
@@ -925,6 +1009,29 @@ def rolling_avg(val, avg):
         kwargs.update({"chunks": offsets})
         return grouped_values.apply_chunks(function, **kwargs)
 
+    def _broadcast(self, values):
+        """
+        Broadcast the results of an aggregation to the group
+
+        Parameters
+        ----------
+        values: Series
+            A Series representing the results of an aggregation.  The
+            index of the Series must be the (unique) values
+            representing the group keys.
+
+        Returns
+        -------
+        A Series of the same size and with the same index as
+        ``self.obj``.
+        """
+        if not values.index.equals(self.grouping.keys):
+            values = values._align_to_index(
+                self.grouping.keys, how="right", allow_non_unique=True
+            )
+            values.index = self.obj.index
+        return values
+
     def transform(self, function):
         """Apply an aggregation, then broadcast the result to the group size.
 
@@ -966,12 +1073,7 @@ def transform(self, function):
                 "Currently, `transform()` supports only aggregations."
             ) from e
 
-        if not result.index.equals(self.grouping.keys):
-            result = result._align_to_index(
-                self.grouping.keys, how="right", allow_non_unique=True
-            )
-            result.index = self.obj.index
-        return result
+        return self._broadcast(result)
 
     def rolling(self, *args, **kwargs):
         """
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index c4c8e81dda2..b00e31115c9 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -2718,3 +2718,36 @@ def test_groupby_group_keys(group_keys, by):
     actual = g_group[["B", "C"]].apply(lambda x: x / x.sum())
     expected = p_group[["B", "C"]].apply(lambda x: x / x.sum())
     assert_eq(actual, expected)
+
+
+@pytest.fixture
+def df_ngroup():
+    df = cudf.DataFrame(
+        {
+            "a": [2, 2, 1, 1, 2, 3],
+            "b": [1, 2, 1, 2, 1, 2],
+            "c": ["a", "a", "b", "c", "d", "c"],
+        },
+        index=[1, 3, 5, 7, 4, 2],
+    )
+    df.index.name = "foo"
+    return df
+
+
+@pytest.mark.parametrize(
+    "by",
+    [
+        lambda: "a",
+        lambda: "b",
+        lambda: ["a", "b"],
+        lambda: "c",
+        lambda: pd.Series([1, 2, 1, 2, 1, 2]),
+        lambda: pd.Series(["x", "y", "y", "x", "z", "x"]),
+    ],
+)
+@pytest.mark.parametrize("ascending", [True, False])
+def test_groupby_ngroup(by, ascending, df_ngroup):
+    by = by()
+    expected = df_ngroup.to_pandas().groupby(by).ngroup(ascending=ascending)
+    actual = df_ngroup.groupby(by).ngroup(ascending=ascending)
+    assert_eq(expected, actual, check_dtype=False)

From ccbd852421fed8f25029a1d47b0bbf833e840ddf Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 12 Oct 2022 09:25:06 -0400
Subject: [PATCH 026/202] Change expect_strings_empty into expect_column_empty
 libcudf test utility (#11873)

Moves the `cudf::test::expect_strings_empty` utility from `cpp/tests/strings` to more generic function `cudf::test::expect_column_empty`

Reference #11734

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Tobias Ribizel (https://github.com/upsj)

URL: https://github.com/rapidsai/cudf/pull/11873
---
 cpp/CMakeLists.txt                            |  1 -
 cpp/include/cudf_test/column_utilities.hpp    |  7 ++++
 cpp/tests/copying/detail_gather_tests.cu      | 15 ++++-----
 cpp/tests/copying/gather_list_tests.cpp       | 17 +++++-----
 cpp/tests/copying/gather_str_tests.cpp        | 16 +++++-----
 cpp/tests/copying/gather_tests.cpp            | 15 ++++-----
 cpp/tests/copying/scatter_list_tests.cpp      | 14 ++++----
 cpp/tests/lists/extract_tests.cpp             |  9 +++---
 .../reshape/interleave_columns_tests.cpp      |  7 ++--
 cpp/tests/strings/array_tests.cpp             | 12 +++----
 cpp/tests/strings/booleans_tests.cpp          | 10 +++---
 cpp/tests/strings/case_tests.cpp              | 21 ++++++------
 .../strings/combine/concatenate_tests.cpp     | 15 ++++-----
 .../strings/combine/join_strings_tests.cpp    | 13 ++++----
 cpp/tests/strings/concatenate_tests.cpp       | 13 ++++----
 cpp/tests/strings/datetime_tests.cpp          | 11 +++----
 cpp/tests/strings/durations_tests.cpp         | 13 ++++----
 cpp/tests/strings/extract_tests.cpp           |  2 --
 cpp/tests/strings/factories_test.cu           | 12 +++----
 cpp/tests/strings/fill_tests.cpp              | 11 +++----
 cpp/tests/strings/findall_tests.cpp           |  6 ++--
 cpp/tests/strings/fixed_point_tests.cpp       | 13 ++++----
 cpp/tests/strings/floats_tests.cpp            |  9 +++---
 cpp/tests/strings/format_lists_tests.cpp      |  6 ++--
 cpp/tests/strings/integers_tests.cpp          | 11 +++----
 cpp/tests/strings/ipv4_tests.cpp              |  8 ++---
 cpp/tests/strings/pad_tests.cpp               | 11 +++----
 cpp/tests/strings/replace_regex_tests.cpp     |  6 ++--
 cpp/tests/strings/replace_tests.cpp           | 11 +++----
 cpp/tests/strings/strip_tests.cpp             | 11 +++----
 cpp/tests/strings/substring_tests.cpp         | 17 +++++-----
 cpp/tests/strings/translate_tests.cpp         | 13 ++++----
 cpp/tests/strings/urls_tests.cpp              | 10 +++---
 cpp/tests/strings/utilities.cpp               | 32 -------------------
 cpp/tests/strings/utilities.h                 | 30 -----------------
 cpp/tests/text/ngrams_tests.cpp               | 12 +++----
 cpp/tests/utilities/column_utilities.cu       |  9 ++++++
 37 files changed, 187 insertions(+), 262 deletions(-)
 delete mode 100644 cpp/tests/strings/utilities.cpp
 delete mode 100644 cpp/tests/strings/utilities.h

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8bde0bcfb9b..bfabbbc625d 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -693,7 +693,6 @@ add_library(
   tests/utilities/base_fixture.cpp
   tests/utilities/column_utilities.cu
   tests/utilities/table_utilities.cu
-  tests/strings/utilities.cpp
 )
 
 set_target_properties(
diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp
index d41ea530402..b7d890fb315 100644
--- a/cpp/include/cudf_test/column_utilities.hpp
+++ b/cpp/include/cudf_test/column_utilities.hpp
@@ -107,6 +107,13 @@ bool expect_columns_equivalent(cudf::column_view const& lhs,
                                debug_output_level verbosity = debug_output_level::FIRST_ERROR,
                                size_type fp_ulps            = cudf::test::default_ulp);
 
+/**
+ * @brief Verifies the given column is empty
+ *
+ * @param col The column to check
+ */
+void expect_column_empty(cudf::column_view const& col);
+
 /**
  * @brief Verifies the bitwise equality of two device memory buffers.
  *
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index e3cd975ab41..9cd74abce1c 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -13,7 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <tests/strings/utilities.h>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
@@ -24,13 +30,6 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/table_utilities.hpp>
-#include <cudf_test/type_lists.hpp>
-
 #include <rmm/device_uvector.hpp>
 
 #include <thrust/execution_policy.h>
diff --git a/cpp/tests/copying/gather_list_tests.cpp b/cpp/tests/copying/gather_list_tests.cpp
index b26ee90c3b9..1caecb558e2 100644
--- a/cpp/tests/copying/gather_list_tests.cpp
+++ b/cpp/tests/copying/gather_list_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,7 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <tests/strings/utilities.h>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
@@ -23,13 +29,6 @@
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/table_utilities.hpp>
-#include <cudf_test/type_lists.hpp>
-
 template <typename T>
 class GatherTestListTyped : public cudf::test::BaseFixture {
 };
diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp
index a9a9a4f9342..4e4e9619fbf 100644
--- a/cpp/tests/copying/gather_str_tests.cpp
+++ b/cpp/tests/copying/gather_str_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,17 +13,17 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <cudf/column/column_view.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/detail/gather.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/table/table_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/table_utilities.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/gather.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
 
 class GatherTestStr : public cudf::test::BaseFixture {
 };
@@ -135,7 +135,7 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn)
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
                                       cudf::detail::negative_index_policy::NOT_ALLOWED);
-  cudf::test::expect_strings_empty(results->get_column(0).view());
+  cudf::test::expect_column_empty(results->get_column(0).view());
 }
 
 TEST_F(GatherTestStr, GatherZeroSizeStringsColumn)
diff --git a/cpp/tests/copying/gather_tests.cpp b/cpp/tests/copying/gather_tests.cpp
index 141503ed978..9c8d6102000 100644
--- a/cpp/tests/copying/gather_tests.cpp
+++ b/cpp/tests/copying/gather_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,13 +13,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <tests/strings/utilities.h>
-
-#include <cudf/column/column_view.hpp>
-#include <cudf/copying.hpp>
-#include <cudf/detail/iterator.cuh>
-#include <cudf/table/table.hpp>
-#include <cudf/table/table_view.hpp>
 
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
@@ -28,6 +21,12 @@
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/column/column_view.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
 template <typename T>
 class GatherTest : public cudf::test::BaseFixture {
 };
diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp
index 0c12f10137a..179ab56fc40 100644
--- a/cpp/tests/copying/scatter_list_tests.cpp
+++ b/cpp/tests/copying/scatter_list_tests.cpp
@@ -14,7 +14,12 @@
  * limitations under the License.
  */
 
-#include <tests/strings/utilities.h>
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
 
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
@@ -24,13 +29,6 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/cudf_gtest.hpp>
-#include <cudf_test/table_utilities.hpp>
-#include <cudf_test/type_lists.hpp>
-
 template <typename T>
 class TypedScatterListsTest : public cudf::test::BaseFixture {
 };
diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp
index 210a5814ede..34c8e044a3f 100644
--- a/cpp/tests/lists/extract_tests.cpp
+++ b/cpp/tests/lists/extract_tests.cpp
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/null_mask.hpp>
-#include <cudf/lists/extract.hpp>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/null_mask.hpp>
+#include <cudf/lists/extract.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 
diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp
index c682e4ab29f..63e465f7658 100644
--- a/cpp/tests/reshape/interleave_columns_tests.cpp
+++ b/cpp/tests/reshape/interleave_columns_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,13 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column_factories.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
-#include <tests/strings/utilities.h>
 
+#include <cudf/column/column_factories.hpp>
 #include <cudf/reshape.hpp>
 
 using namespace cudf::test::iterators;
@@ -195,7 +194,7 @@ TEST_F(InterleaveStringsColumnsTest, ZeroSizedColumns)
   cudf::column_view col0(cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
 
   auto results = cudf::interleave_columns(cudf::table_view{{col0}});
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(InterleaveStringsColumnsTest, SingleColumn)
diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp
index 10cc4562be7..488184f4099 100644
--- a/cpp/tests/strings/array_tests.cpp
+++ b/cpp/tests/strings/array_tests.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <tests/strings/utilities.h>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -53,7 +51,7 @@ TEST_F(StringsColumnTest, SortZeroSizeStringsColumn)
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto results = cudf::sort(cudf::table_view({zero_size_strings_column}));
-  cudf::test::expect_strings_empty(results->view().column(0));
+  cudf::test::expect_column_empty(results->view().column(0));
 }
 
 class SliceParmsTest : public StringsColumnTest,
@@ -123,7 +121,7 @@ TEST_F(StringsColumnTest, SliceZeroSizeStringsColumn)
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
   auto results      = cudf::strings::detail::copy_slice(strings_view, 1, 2);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsColumnTest, Gather)
@@ -151,7 +149,7 @@ TEST_F(StringsColumnTest, GatherZeroSizeStringsColumn)
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   cudf::column_view map_view(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0);
   auto results = cudf::gather(cudf::table_view{{zero_size_strings_column}}, map_view)->release();
-  cudf::test::expect_strings_empty(results.front()->view());
+  cudf::test::expect_column_empty(results.front()->view());
 }
 
 TEST_F(StringsColumnTest, GatherTooBig)
@@ -204,12 +202,12 @@ TEST_F(StringsColumnTest, ScatterZeroSizeStringsColumn)
   cudf::column_view scatter_map(cudf::data_type{cudf::type_id::INT8}, 0, nullptr, nullptr, 0);
 
   auto results = cudf::scatter(cudf::table_view({source}), scatter_map, cudf::table_view({target}));
-  cudf::test::expect_strings_empty(results->view().column(0));
+  cudf::test::expect_column_empty(results->view().column(0));
 
   cudf::string_scalar scalar("");
   auto scalar_source = std::vector<std::reference_wrapper<const cudf::scalar>>({scalar});
   results            = cudf::scatter(scalar_source, scatter_map, cudf::table_view({target}));
-  cudf::test::expect_strings_empty(results->view().column(0));
+  cudf::test::expect_column_empty(results->view().column(0));
 }
 
 TEST_F(StringsColumnTest, OffsetsBeginEnd)
diff --git a/cpp/tests/strings/booleans_tests.cpp b/cpp/tests/strings/booleans_tests.cpp
index cc637bf55a0..b8e47a89274 100644
--- a/cpp/tests/strings/booleans_tests.cpp
+++ b/cpp/tests/strings/booleans_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/convert/convert_booleans.hpp>
-#include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/convert/convert_booleans.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
@@ -69,7 +69,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnBoolean)
 {
   cudf::column_view zero_size_column(cudf::data_type{cudf::type_id::BOOL8}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::from_booleans(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsConvertTest, ZeroSizeBooleansColumn)
diff --git a/cpp/tests/strings/case_tests.cpp b/cpp/tests/strings/case_tests.cpp
index c399c640bb6..26b44b577eb 100644
--- a/cpp/tests/strings/case_tests.cpp
+++ b/cpp/tests/strings/case_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/strings/capitalize.hpp>
 #include <cudf/strings/case.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -211,19 +210,19 @@ TEST_F(StringsCaseTest, EmptyStringsColumn)
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
 
   auto results = cudf::strings::to_lower(strings_view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   results = cudf::strings::to_upper(strings_view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   results = cudf::strings::swapcase(strings_view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   results = cudf::strings::capitalize(strings_view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   results = cudf::strings::title(strings_view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsCaseTest, ErrorTest)
diff --git a/cpp/tests/strings/combine/concatenate_tests.cpp b/cpp/tests/strings/combine/concatenate_tests.cpp
index 569767531bc..0b744cd6bb4 100644
--- a/cpp/tests/strings/combine/concatenate_tests.cpp
+++ b/cpp/tests/strings/combine/concatenate_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/combine.hpp>
@@ -21,11 +25,6 @@
 #include <cudf/table/table.hpp>
 #include <cudf/types.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
@@ -158,7 +157,7 @@ TEST_F(StringsCombineTest, ConcatZeroSizeStringsColumns)
   strings_columns.push_back(zero_size_strings_column);
   cudf::table_view table(strings_columns);
   auto results = cudf::strings::concatenate(table);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsCombineTest, SingleColumnErrorCheck)
@@ -207,7 +206,7 @@ TEST_F(StringsConcatenateWithColSeparatorTest, ZeroSizedColumns)
 
   auto results =
     cudf::strings::concatenate(cudf::table_view{{col0}}, cudf::strings_column_view(col0));
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsConcatenateWithColSeparatorTest, SingleColumnEmptyAndNullStringsNoReplacements)
diff --git a/cpp/tests/strings/combine/join_strings_tests.cpp b/cpp/tests/strings/combine/join_strings_tests.cpp
index e018540e84c..e0187ce2e26 100644
--- a/cpp/tests/strings/combine/join_strings_tests.cpp
+++ b/cpp/tests/strings/combine/join_strings_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/combine.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/types.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 struct JoinStringsTest : public cudf::test::BaseFixture {
@@ -66,7 +65,7 @@ TEST_F(JoinStringsTest, JoinZeroSizeStringsColumn)
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
   auto results      = cudf::strings::join_strings(strings_view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(JoinStringsTest, JoinAllNullStringsColumn)
diff --git a/cpp/tests/strings/concatenate_tests.cpp b/cpp/tests/strings/concatenate_tests.cpp
index 0318fc3edb9..1462d4dc73a 100644
--- a/cpp/tests/strings/concatenate_tests.cpp
+++ b/cpp/tests/strings/concatenate_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column_factories.hpp>
-#include <cudf/strings/detail/concatenate.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/strings/detail/concatenate.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <vector>
 
@@ -76,7 +75,7 @@ TEST_F(StringsConcatenateTest, ZeroSizeStringsColumns)
   strings_columns.push_back(zero_size_strings_column);
   strings_columns.push_back(zero_size_strings_column);
   auto results = cudf::strings::detail::concatenate(strings_columns);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsConcatenateTest, ZeroSizeStringsPlusNormal)
diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp
index d8203917d4c..26beaf9756a 100644
--- a/cpp/tests/strings/datetime_tests.cpp
+++ b/cpp/tests/strings/datetime_tests.cpp
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/strings/convert/convert_datetime.hpp>
 #include <cudf/strings/convert/convert_durations.hpp>
 #include <cudf/strings/strings_column_view.hpp>
@@ -21,11 +25,6 @@
 #include <cudf/wrappers/durations.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -573,7 +572,7 @@ TEST_F(StringsDatetimeTest, ZeroSizeStringsColumn)
   cudf::column_view zero_size_column(
     cudf::data_type{cudf::type_id::TIMESTAMP_SECONDS}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::from_timestamps(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
diff --git a/cpp/tests/strings/durations_tests.cpp b/cpp/tests/strings/durations_tests.cpp
index 523c64159f4..ac971aa300d 100644
--- a/cpp/tests/strings/durations_tests.cpp
+++ b/cpp/tests/strings/durations_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,13 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/convert/convert_durations.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/wrappers/durations.hpp>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/convert/convert_durations.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/wrappers/durations.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 #include <vector>
@@ -732,7 +731,7 @@ TEST_F(StringsDurationsTest, ZeroSizeStringsColumn)
   cudf::column_view zero_size_column(
     cudf::data_type{cudf::type_id::DURATION_SECONDS}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::from_durations(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp
index 49a0c51e14f..e396ca42d6c 100644
--- a/cpp/tests/strings/extract_tests.cpp
+++ b/cpp/tests/strings/extract_tests.cpp
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <tests/strings/utilities.h>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index 36fdd423168..a381c1cff89 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
@@ -24,10 +28,6 @@
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/span.hpp>
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
 
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
@@ -188,12 +188,12 @@ TEST_F(StringsFactoriesTest, EmptyStringsColumn)
   rmm::device_uvector<cudf::bitmask_type> d_nulls{0, cudf::default_stream_value};
 
   auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> d_strings{
     0, cudf::default_stream_value};
   results = cudf::make_strings_column(d_strings);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 namespace {
diff --git a/cpp/tests/strings/fill_tests.cpp b/cpp/tests/strings/fill_tests.cpp
index 44bbb3c9c29..46f6b633dc5 100644
--- a/cpp/tests/strings/fill_tests.cpp
+++ b/cpp/tests/strings/fill_tests.cpp
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/detail/fill.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -83,7 +82,7 @@ TEST_F(StringsFillTest, ZeroSizeStringsColumns)
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::detail::fill(
     cudf::strings_column_view(zero_size_strings_column), 0, 1, cudf::string_scalar(""));
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsFillTest, FillRangeError)
diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp
index b55d0977215..1dd088cb70f 100644
--- a/cpp/tests/strings/findall_tests.cpp
+++ b/cpp/tests/strings/findall_tests.cpp
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/findall.hpp>
-#include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/table_utilities.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/findall.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
diff --git a/cpp/tests/strings/fixed_point_tests.cpp b/cpp/tests/strings/fixed_point_tests.cpp
index 81122b1c5d8..15c12421dd9 100644
--- a/cpp/tests/strings/fixed_point_tests.cpp
+++ b/cpp/tests/strings/fixed_point_tests.cpp
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
-#include <cudf/fixed_point/fixed_point.hpp>
-#include <cudf/strings/convert/convert_fixed_point.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
-#include <limits>
 
-#include <tests/strings/utilities.h>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/strings/convert/convert_fixed_point.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+
+#include <limits>
 
 struct StringsConvertTest : public cudf::test::BaseFixture {
 };
@@ -224,7 +223,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnFixedPoint)
   auto zero_size_column = cudf::make_empty_column(cudf::data_type{cudf::type_id::DECIMAL32});
 
   auto results = cudf::strings::from_fixed_point(zero_size_column->view());
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsConvertTest, ZeroSizeFixedPointColumn)
diff --git a/cpp/tests/strings/floats_tests.cpp b/cpp/tests/strings/floats_tests.cpp
index 360ea8be178..1a3c5ada04f 100644
--- a/cpp/tests/strings/floats_tests.cpp
+++ b/cpp/tests/strings/floats_tests.cpp
@@ -14,13 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/convert/convert_floats.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/convert/convert_floats.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
@@ -188,7 +187,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnFloat)
   cudf::column_view zero_size_column(
     cudf::data_type{cudf::type_id::FLOAT32}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::from_floats(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsConvertTest, ZeroSizeFloatsColumn)
diff --git a/cpp/tests/strings/format_lists_tests.cpp b/cpp/tests/strings/format_lists_tests.cpp
index 63fcdf6f00e..f1ab90ee9c5 100644
--- a/cpp/tests/strings/format_lists_tests.cpp
+++ b/cpp/tests/strings/format_lists_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,6 @@
  * limitations under the License.
  */
 
-#include <tests/strings/utilities.h>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -36,7 +34,7 @@ TEST_F(StringsFormatListsTest, EmptyList)
   auto const view  = cudf::lists_column_view(input);
 
   auto results = cudf::strings::format_list_column(view);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsFormatListsTest, EmptyNestedList)
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index 5802a1ddc0a..e938eec8b3e 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -14,15 +14,14 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/convert/convert_integers.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
-#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_lists.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/strings/convert/convert_integers.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
@@ -265,7 +264,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumn)
 {
   cudf::column_view zero_size_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::from_integers(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsConvertTest, ZeroSizeIntegersColumn)
diff --git a/cpp/tests/strings/ipv4_tests.cpp b/cpp/tests/strings/ipv4_tests.cpp
index 1bc726edea7..0a404534916 100644
--- a/cpp/tests/strings/ipv4_tests.cpp
+++ b/cpp/tests/strings/ipv4_tests.cpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/convert/convert_ipv4.hpp>
-#include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/convert/convert_ipv4.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
@@ -75,7 +75,7 @@ TEST_F(StringsConvertTest, ZeroSizeStringsColumnIPV4)
 {
   cudf::column_view zero_size_column(cudf::data_type{cudf::type_id::INT64}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::integers_to_ipv4(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
   results = cudf::strings::ipv4_to_integers(results->view());
   EXPECT_EQ(0, results->size());
 }
diff --git a/cpp/tests/strings/pad_tests.cpp b/cpp/tests/strings/pad_tests.cpp
index 1ccef58a8f6..c416c2b3ce1 100644
--- a/cpp/tests/strings/pad_tests.cpp
+++ b/cpp/tests/strings/pad_tests.cpp
@@ -14,6 +14,10 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/strings/padding.hpp>
@@ -21,11 +25,6 @@
 #include <cudf/strings/wrap.hpp>
 #include <cudf/utilities/error.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -103,7 +102,7 @@ TEST_F(StringsPadTest, ZeroSizeStringsColumn)
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
   auto results      = cudf::strings::pad(strings_view, 5);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 class PadParameters : public StringsPadTest, public testing::WithParamInterface<cudf::size_type> {
diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp
index 79d968b14ad..6280463d112 100644
--- a/cpp/tests/strings/replace_regex_tests.cpp
+++ b/cpp/tests/strings/replace_regex_tests.cpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/replace_re.hpp>
-#include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/replace_re.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp
index 75c6cfa70e4..cd39c1e088a 100644
--- a/cpp/tests/strings/replace_tests.cpp
+++ b/cpp/tests/strings/replace_tests.cpp
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/strings/detail/replace.hpp>
 #include <cudf/strings/replace.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
-#include "./utilities.h"
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-
 #include <thrust/iterator/constant_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 
@@ -348,5 +347,5 @@ TEST_F(StringsReplaceTest, EmptyStringsColumn)
   auto results      = cudf::strings::replace(
     strings_view, cudf::string_scalar("not"), cudf::string_scalar("pertinent"));
   auto view = results->view();
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
diff --git a/cpp/tests/strings/strip_tests.cpp b/cpp/tests/strings/strip_tests.cpp
index 6916b990762..f7044b48e40 100644
--- a/cpp/tests/strings/strip_tests.cpp
+++ b/cpp/tests/strings/strip_tests.cpp
@@ -14,15 +14,14 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-#include <cudf/strings/strip.hpp>
-
-#include "./utilities.h"
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 
+#include <cudf/column/column.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/strings/strip.hpp>
+
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -99,7 +98,7 @@ TEST_F(StringsStripTest, EmptyStringsColumn)
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
   auto results      = cudf::strings::strip(strings_view);
   auto view         = results->view();
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsStripTest, InvalidParameter)
diff --git a/cpp/tests/strings/substring_tests.cpp b/cpp/tests/strings/substring_tests.cpp
index 1a90dc5fe38..e8e2d936d12 100644
--- a/cpp/tests/strings/substring_tests.cpp
+++ b/cpp/tests/strings/substring_tests.cpp
@@ -14,16 +14,15 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/strings/substring.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/host_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 #include <thrust/sequence.h>
@@ -283,18 +282,18 @@ TEST_F(StringsSubstringsTest, ZeroSizeStringsColumn)
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
 
   auto results = cudf::strings::slice_strings(strings_view, 1, 2);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   results = cudf::strings::slice_strings(strings_view, cudf::string_scalar("foo"), 1);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   cudf::column_view starts_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0);
   cudf::column_view stops_column(cudf::data_type{cudf::type_id::INT32}, 0, nullptr, nullptr, 0);
   results = cudf::strings::slice_strings(strings_view, starts_column, stops_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 
   results = cudf::strings::slice_strings(strings_view, strings_view, 1);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsSubstringsTest, AllEmpty)
diff --git a/cpp/tests/strings/translate_tests.cpp b/cpp/tests/strings/translate_tests.cpp
index 53c6982b880..1e278caa366 100644
--- a/cpp/tests/strings/translate_tests.cpp
+++ b/cpp/tests/strings/translate_tests.cpp
@@ -14,17 +14,16 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/strings/translate.hpp>
 #include <cudf/utilities/error.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
-
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -69,9 +68,9 @@ TEST_F(StringsTranslateTest, ZeroSizeStringsColumn)
   auto strings_view = cudf::strings_column_view(zero_size_strings_column);
   std::vector<std::pair<cudf::char_utf8, cudf::char_utf8>> translate_table;
   auto results = cudf::strings::translate(strings_view, translate_table);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
   results = cudf::strings::filter_characters(strings_view, translate_table);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsTranslateTest, FilterCharacters)
diff --git a/cpp/tests/strings/urls_tests.cpp b/cpp/tests/strings/urls_tests.cpp
index 95a51bbaaeb..9199d78cfb8 100644
--- a/cpp/tests/strings/urls_tests.cpp
+++ b/cpp/tests/strings/urls_tests.cpp
@@ -14,12 +14,12 @@
  * limitations under the License.
  */
 
-#include <cudf/strings/convert/convert_urls.hpp>
-#include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/strings/convert/convert_urls.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
@@ -230,7 +230,7 @@ TEST_F(StringsConvertTest, ZeroSizeUrlStringsColumn)
   cudf::column_view zero_size_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto results = cudf::strings::url_encode(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
   results = cudf::strings::url_decode(zero_size_column);
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
diff --git a/cpp/tests/strings/utilities.cpp b/cpp/tests/strings/utilities.cpp
deleted file mode 100644
index 1d7ec7cbecd..00000000000
--- a/cpp/tests/strings/utilities.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (c) 2019, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <tests/strings/utilities.h>
-
-#include <gmock/gmock.h>
-
-namespace cudf {
-namespace test {
-void expect_strings_empty(cudf::column_view strings_column)
-{
-  EXPECT_EQ(type_id::STRING, strings_column.type().id());
-  EXPECT_EQ(0, strings_column.size());
-  EXPECT_EQ(0, strings_column.null_count());
-  EXPECT_EQ(0, strings_column.num_children());
-}
-
-}  // namespace test
-}  // namespace cudf
diff --git a/cpp/tests/strings/utilities.h b/cpp/tests/strings/utilities.h
deleted file mode 100644
index d6f0e9c4f1f..00000000000
--- a/cpp/tests/strings/utilities.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2019-2020, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/column/column.hpp>
-
-namespace cudf {
-namespace test {
-/**
- * @brief Utility will verify the given strings column is empty.
- *
- * @param strings_column Column of strings to check
- */
-void expect_strings_empty(cudf::column_view strings_column);
-
-}  // namespace test
-}  // namespace cudf
diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp
index 20ffd3baa41..61bd1b3dccd 100644
--- a/cpp/tests/text/ngrams_tests.cpp
+++ b/cpp/tests/text/ngrams_tests.cpp
@@ -14,13 +14,13 @@
  * limitations under the License.
  */
 
-#include <cudf/column/column.hpp>
-#include <cudf/scalar/scalar.hpp>
-#include <cudf/strings/strings_column_view.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
-#include <tests/strings/utilities.h>
+
+#include <cudf/column/column.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <nvtext/generate_ngrams.hpp>
 
@@ -105,9 +105,9 @@ TEST_F(TextGenerateNgramsTest, Empty)
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
   auto results = nvtext::generate_ngrams(cudf::strings_column_view(zero_size_strings_column));
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
   results = nvtext::generate_character_ngrams(cudf::strings_column_view(zero_size_strings_column));
-  cudf::test::expect_strings_empty(results->view());
+  cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(TextGenerateNgramsTest, Errors)
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index 5106196a58f..d0fc92b0bb5 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -854,6 +854,15 @@ bool expect_columns_equivalent(cudf::column_view const& lhs,
                                fp_ulps);
 }
 
+/**
+ * @copydoc cudf::test::expect_column_empty
+ */
+void expect_column_empty(cudf::column_view const& col)
+{
+  EXPECT_EQ(0, col.size());
+  EXPECT_EQ(0, col.null_count());
+}
+
 /**
  * @copydoc cudf::test::expect_equal_buffers
  */

From 75a6973275f4601411c94dc42d878a5bfc04a104 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 12 Oct 2022 08:53:43 -0500
Subject: [PATCH 027/202] Relax `codecov` threshold diff (#11899)

This PR relaxes `codecov` threshold which will allow CI checks to pass(though it's optional to merge).

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Jake Awe (https://github.com/AyodeAwe)

URL: https://github.com/rapidsai/cudf/pull/11899
---
 codecov.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codecov.yml b/codecov.yml
index d45c7e2990f..344d4f3f04e 100644
--- a/codecov.yml
+++ b/codecov.yml
@@ -5,7 +5,7 @@ coverage:
     patch:
       default:
         target: auto
-        threshold: 0%
+        threshold: 5%
 
 github_checks:
     annotations: true

From 8b5ab2394748a84c2200da389f7f3ff3de5b5590 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 12 Oct 2022 09:58:24 -0400
Subject: [PATCH 028/202] Fix memcheck error in TypeInference.Timestamp gtest
 (#11905)

Fixes an error in the `TypeInference.Timestamp` gtest where the `size` parameter was incorrect.

This error was found by the nightly builds and could be recreated using
```
compute-sanitizer --tool memcheck gtests/TYPE_INFERENCE_TEST --gtest_filter=TypeInference.Timestamp --rmm_mode=cuda
```

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11905
---
 cpp/tests/io/type_inference_test.cu | 49 +++++++++++++----------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu
index 04bb7507934..4d01ef95b85 100644
--- a/cpp/tests/io/type_inference_test.cu
+++ b/cpp/tests/io/type_inference_test.cu
@@ -50,9 +50,8 @@ TEST_F(TypeInference, Basic)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 3;
-  auto const string_offset   = std::vector<int32_t>{1, 4, 7};
-  auto const string_length   = std::vector<std::size_t>{2, 2, 1};
+  auto const string_offset = std::vector<int32_t>{1, 4, 7};
+  auto const string_length = std::vector<std::size_t>{2, 2, 1};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -63,7 +62,7 @@ TEST_F(TypeInference, Basic)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::INT64});
@@ -82,9 +81,8 @@ TEST_F(TypeInference, Null)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 3;
-  auto const string_offset   = std::vector<int32_t>{1, 1, 4};
-  auto const string_length   = std::vector<std::size_t>{0, 2, 1};
+  auto const string_offset = std::vector<int32_t>{1, 1, 4};
+  auto const string_length = std::vector<std::size_t>{0, 2, 1};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -95,7 +93,7 @@ TEST_F(TypeInference, Null)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   EXPECT_EQ(res_type,
@@ -115,9 +113,8 @@ TEST_F(TypeInference, AllNull)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 3;
-  auto const string_offset   = std::vector<int32_t>{1, 1, 1};
-  auto const string_length   = std::vector<std::size_t>{0, 0, 4};
+  auto const string_offset = std::vector<int32_t>{1, 1, 1};
+  auto const string_length = std::vector<std::size_t>{0, 0, 4};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -128,7 +125,7 @@ TEST_F(TypeInference, AllNull)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::INT8});  // INT8 if all nulls
@@ -147,9 +144,8 @@ TEST_F(TypeInference, String)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 3;
-  auto const string_offset   = std::vector<int32_t>{1, 8, 12};
-  auto const string_length   = std::vector<std::size_t>{6, 3, 4};
+  auto const string_offset = std::vector<int32_t>{1, 8, 12};
+  auto const string_length = std::vector<std::size_t>{6, 3, 4};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -160,7 +156,7 @@ TEST_F(TypeInference, String)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING});
@@ -179,9 +175,8 @@ TEST_F(TypeInference, Bool)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 3;
-  auto const string_offset   = std::vector<int32_t>{1, 6, 12};
-  auto const string_length   = std::vector<std::size_t>{4, 5, 5};
+  auto const string_offset = std::vector<int32_t>{1, 6, 12};
+  auto const string_length = std::vector<std::size_t>{4, 5, 5};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -192,7 +187,7 @@ TEST_F(TypeInference, Bool)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::BOOL8});
@@ -211,9 +206,8 @@ TEST_F(TypeInference, Timestamp)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 3;
-  auto const string_offset   = std::vector<int32_t>{1, 10};
-  auto const string_length   = std::vector<std::size_t>{8, 9};
+  auto const string_offset = std::vector<int32_t>{1, 10};
+  auto const string_length = std::vector<std::size_t>{8, 9};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -224,7 +218,7 @@ TEST_F(TypeInference, Timestamp)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   // All data time (quoted and unquoted) is inferred as string for now
@@ -244,9 +238,8 @@ TEST_F(TypeInference, InvalidInput)
   auto d_data           = cudf::make_string_scalar(data);
   auto& d_string_scalar = static_cast<cudf::string_scalar&>(*d_data);
 
-  std::size_t constexpr size = 5;
-  auto const string_offset   = std::vector<int32_t>{1, 3, 5, 7, 9};
-  auto const string_length   = std::vector<std::size_t>{1, 1, 1, 1, 1};
+  auto const string_offset = std::vector<int32_t>{1, 3, 5, 7, 9};
+  auto const string_length = std::vector<std::size_t>{1, 1, 1, 1, 1};
   rmm::device_vector<int32_t> d_string_offset{string_offset};
   rmm::device_vector<std::size_t> d_string_length{string_length};
 
@@ -257,7 +250,7 @@ TEST_F(TypeInference, InvalidInput)
     infer_data_type(options.json_view(),
                     {d_string_scalar.data(), static_cast<std::size_t>(d_string_scalar.size())},
                     d_col_strings,
-                    size,
+                    string_offset.size(),
                     stream);
 
   // Invalid input is inferred as string for now

From 3226859c9dd860c5225ffb34cc2de4c0a5e3bf71 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 12 Oct 2022 13:11:59 -0400
Subject: [PATCH 029/202] Fix memcheck error in get_dremel_data (#11903)

Fixes logic that applies offsets to nested column children to not write past the end of the offsets vector.

This error was found by the nightly builds and could be recreated using
```
compute-sanitizer --tool memcheck gtests/PARQUET_TEST --gtest_filter=ParquetReaderTest.NestedByteArray --rmm_mode=cuda
```

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Tobias Ribizel (https://github.com/upsj)

URL: https://github.com/rapidsai/cudf/pull/11903
---
 cpp/src/lists/dremel.cu | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu
index cb9cd4293b5..25094536cce 100644
--- a/cpp/src/lists/dremel.cu
+++ b/cpp/src/lists/dremel.cu
@@ -225,6 +225,7 @@ dremel_data get_dremel_data(column_view h_col,
   cudf::detail::device_single_thread(
     [offset_at_level  = d_column_offsets.data(),
      end_idx_at_level = d_column_ends.data(),
+     level_max        = d_column_offsets.size(),
      col              = *d_col] __device__() {
       auto curr_col           = col;
       size_type off           = curr_col.offset();
@@ -239,9 +240,11 @@ dremel_data get_dremel_data(column_view h_col,
         if (curr_col.type().id() == type_id::LIST) {
           off = curr_col.child(lists_column_view::offsets_column_index).element<size_type>(off);
           end = curr_col.child(lists_column_view::offsets_column_index).element<size_type>(end);
-          offset_at_level[level]  = off;
-          end_idx_at_level[level] = end;
-          ++level;
+          if (level < level_max) {
+            offset_at_level[level]  = off;
+            end_idx_at_level[level] = end;
+            ++level;
+          }
           curr_col = curr_col.child(lists_column_view::child_column_index);
         } else {
           curr_col = curr_col.child(0);

From 0ca68c79662a476fd930a16323102e7087d8c080 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 12 Oct 2022 23:47:23 -0400
Subject: [PATCH 030/202] Add thrust output iterator fix (1805) to thrust.patch
 (#11900)

Adds fix from https://github.com/NVIDIA/thrust/pull/1805 to libcudf's `thrust.patch`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11900
---
 cpp/cmake/thrust.patch | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch
index ae1962e4738..0dd9854d4aa 100644
--- a/cpp/cmake/thrust.patch
+++ b/cpp/cmake/thrust.patch
@@ -114,3 +114,29 @@ index d0e3f94..76774b0 100644
  /**
   * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
   * implementation. This version allows using different token sequences for callables
+diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h
+index f512a36..a5f725d 100644
+--- a/thrust/iterator/transform_input_output_iterator.h
++++ b/thrust/iterator/transform_input_output_iterator.h
+@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
+   /*! \endcond
+    */
+ 
++  transform_input_output_iterator() = default;
++
+   /*! This constructor takes as argument a \c Iterator an \c InputFunction and an
+    * \c OutputFunction and copies them to a new \p transform_input_output_iterator
+    *
+diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h
+index 66fb46a..4a68cb5 100644
+--- a/thrust/iterator/transform_output_iterator.h
++++ b/thrust/iterator/transform_output_iterator.h
+@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
+   /*! \endcond
+    */
+ 
++  transform_output_iterator() = default;
++
+   /*! This constructor takes as argument an \c OutputIterator and an \c
+    * UnaryFunction and copies them to a new \p transform_output_iterator
+    *

From 678946b52d60b96b673aef299fc2a1f36428df70 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 13 Oct 2022 08:38:03 -0400
Subject: [PATCH 031/202] Fix segmented-sort to ignore indices outside the
 offsets (#11888)

Fixes `cudf::segmented_sorted_order` to ignore indices outside the specified offsets values.

The segmented-sort function in general sorts subsets of the input using a column of offsets (integers) to identify the position of each segment. Here is an example:
```
input    = { 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
offsets1 = { 0,       3,          7,      10 }
```
There are 3 segments to sort: `[0,3)`, `[3,7)`, and `[7,10)`
Segment 1 sorts to `{ 7, 8, 9 }`
Segment 2 sorts to `{ 3, 4, 5, 6 }`
Segment 3 sorts to `{ 0, 1, 2 }`
The segmented-sort result is `{ 7, 8, 9, 3, 4, 5, 6, 0, 1, 2 }`

If the offsets do not fully cover all the input the segmented-sort should ignore any segments outside of the offsets.
```
input    = { 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }
offsets2 = {          3,          7       }
```
Here there is only 1 segments to sort: `[3,7) => { 3, 4, 5, 6 }`
The segmented-sort result is `{ 9, 8, 7, 3, 4, 5, 6, 2, 1, 0 }`
The values before the first offset and after the last offset should be left unchanged.

The gtests have been corrected to expect this behavior.
Also, the `SegmentedReductionTestUntyped.PartialSegmentReduction` gtest was improved to include offset gaps at the beginning and at the end to verify consistent behavior there as well.

Found while working on #11729

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/11888
---
 cpp/include/cudf/sorting.hpp                  | 50 ++++++++++++++++++-
 cpp/src/sort/segmented_sort.cu                | 48 +++++++++++++-----
 .../reductions/segmented_reduction_tests.cpp  |  9 ++--
 cpp/tests/sort/segmented_sort_tests.cpp       | 16 ++++--
 4 files changed, 100 insertions(+), 23 deletions(-)

diff --git a/cpp/include/cudf/sorting.hpp b/cpp/include/cudf/sorting.hpp
index cf21da1b030..f43089210fd 100644
--- a/cpp/include/cudf/sorting.hpp
+++ b/cpp/include/cudf/sorting.hpp
@@ -207,9 +207,31 @@ std::unique_ptr<column> rank(
 /**
  * @brief Returns sorted order after sorting each segment in the table.
  *
- * If segment_offsets contains values larger than number of rows, behavior is undefined.
+ * If segment_offsets contains values larger than the number of rows, the behavior is undefined.
  * @throws cudf::logic_error if `segment_offsets` is not `size_type` column.
  *
+ * @code{.pseudo}
+ * Example:
+ * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
+ * offsets = {0, 3, 7, 10}
+ * result = cudf::segmented_sorted_order(keys, offsets);
+ * result is { 2,1,0, 6,5,4,3, 9,8,7 }
+ * @endcode
+ *
+ * If segment_offsets is empty or contains a single index, no values are sorted
+ * and the result is a sequence of integers from 0 to keys.size()-1.
+ *
+ * The segment_offsets are not required to include all indices. Any indices
+ * outside the specified segments will not be sorted.
+ *
+ * @code{.pseudo}
+ * Example: (offsets do not cover all indices)
+ * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
+ * offsets = {3, 7}
+ * result = cudf::segmented_sorted_order(keys, offsets);
+ * result is { 0,1,2, 6,5,4,3, 7,8,9 }
+ * @endcode
+ *
  * @param keys The table that determines the ordering of elements in each segment
  * @param segment_offsets The column of `size_type` type containing start offset index for each
  * contiguous segment.
@@ -246,10 +268,34 @@ std::unique_ptr<column> stable_segmented_sorted_order(
 /**
  * @brief Performs a lexicographic segmented sort of a table
  *
- * If segment_offsets contains values larger than number of rows, behavior is undefined.
+ * If segment_offsets contains values larger than the number of rows, the behavior is undefined.
  * @throws cudf::logic_error if `values.num_rows() != keys.num_rows()`.
  * @throws cudf::logic_error if `segment_offsets` is not `size_type` column.
  *
+ * @code{.pseudo}
+ * Example:
+ * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
+ * values = { {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} }
+ * offsets = {0, 3, 7, 10}
+ * result = cudf::segmented_sort_by_key(keys, values, offsets);
+ * result is { 'c','b','a', 'g','f','e','d', 'j','i','h' }
+ * @endcode
+ *
+ * If segment_offsets is empty or contains a single index, no values are sorted
+ * and the result is a copy of the values.
+ *
+ * The segment_offsets are not required to include all indices. Any indices
+ * outside the specified segments will not be sorted.
+ *
+ * @code{.pseudo}
+ * Example: (offsets do not cover all indices)
+ * keys = { {9, 8, 7, 6, 5, 4, 3, 2, 1, 0} }
+ * values = { {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j'} }
+ * offsets = {3, 7}
+ * result = cudf::segmented_sort_by_key(keys, values, offsets);
+ * result is { 'a','b','c', 'g','f','e','d', 'h','i','j' }
+ * @endcode
+ *
  * @param values The table to reorder
  * @param keys The table that determines the ordering of elements in each segment
  * @param segment_offsets The column of `size_type` type containing start offset index for each
diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu
index 3422330bf8b..c5f13df5305 100644
--- a/cpp/src/sort/segmented_sort.cu
+++ b/cpp/src/sort/segmented_sort.cu
@@ -24,7 +24,6 @@
 
 #include <thrust/binary_search.h>
 #include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
 
 namespace cudf {
 namespace detail {
@@ -35,24 +34,49 @@ namespace {
  */
 enum class sort_method { STABLE, UNSTABLE };
 
-// returns segment indices for each element for all segments.
-// first segment begin index = 0, last segment end index = num_rows.
+/**
+ * @brief Builds indices to identify segments to sort
+ *
+ * The segments are added to the input table-view keys so they
+ * are lexicographically sorted within the segmented groups.
+ *
+ * ```
+ * Example 1:
+ * num_rows = 10
+ * offsets = {0, 3, 7, 10}
+ * segment-indices -> { 3,3,3, 7,7,7,7, 10,10,10 }
+ * ```
+ *
+ * ```
+ * Example 2: (offsets do not cover all indices)
+ * num_rows = 10
+ * offsets = {3, 7}
+ * segment-indices -> { 0,1,2, 7,7,7,7, 8,9,10 }
+ * ```
+ *
+ * @param num_rows Total number of rows in the input keys to sort
+ * @param offsets The offsets identifying the segments
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
 rmm::device_uvector<size_type> get_segment_indices(size_type num_rows,
                                                    column_view const& offsets,
                                                    rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<size_type> segment_ids(num_rows, stream);
 
-  auto offset_begin = offsets.begin<size_type>();  // assumes already offset column contains offset.
-  auto offsets_minus_one = thrust::make_transform_iterator(
-    offset_begin, [offset_begin] __device__(auto i) { return i - 1; });
+  auto offset_begin  = offsets.begin<size_type>();
+  auto offset_end    = offsets.end<size_type>();
   auto counting_iter = thrust::make_counting_iterator<size_type>(0);
-  thrust::lower_bound(rmm::exec_policy(stream),
-                      offsets_minus_one,
-                      offsets_minus_one + offsets.size(),
-                      counting_iter,
-                      counting_iter + segment_ids.size(),
-                      segment_ids.begin());
+  thrust::transform(rmm::exec_policy(stream),
+                    counting_iter,
+                    counting_iter + segment_ids.size(),
+                    segment_ids.begin(),
+                    [offset_begin, offset_end] __device__(auto idx) {
+                      if (offset_begin == offset_end || idx < *offset_begin) { return idx; }
+                      if (idx >= *(offset_end - 1)) { return idx + 1; }
+                      return static_cast<size_type>(
+                        *thrust::upper_bound(thrust::seq, offset_begin, offset_end, idx));
+                    });
   return segment_ids;
 }
 
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index 4fd62f9b938..a8547ea982d 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -655,9 +655,9 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
 
   auto const input = fixed_width_column_wrapper<int32_t>{
     {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}};
-  auto const offsets   = std::vector<size_type>{0, 1, 3, 4};
+  auto const offsets   = std::vector<size_type>{1, 3, 4};
   auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<int32_t>{{1, 5, 4}, {true, true, true}};
+  auto const expect    = fixed_width_column_wrapper<int32_t>{{5, 4}, {true, true}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -669,7 +669,7 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
 
   // Test with initial value
   auto const init_scalar = cudf::make_fixed_width_scalar<int32_t>(3);
-  auto const init_expect = fixed_width_column_wrapper<int32_t>{{4, 8, 7}, {true, true, true}};
+  auto const init_expect = fixed_width_column_wrapper<int32_t>{{8, 7}, {true, true}};
 
   res = segmented_reduce(input,
                          d_offsets,
@@ -681,8 +681,7 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
 
   // Test with null initial value
   init_scalar->set_valid_async(false);
-  auto null_init_expect =
-    fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX}, {false, false, false}};
+  auto null_init_expect = fixed_width_column_wrapper<int32_t>{{XXX, XXX}, {false, false}};
 
   res = segmented_reduce(input,
                          d_offsets,
diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp
index fb07bfde795..53642a89b3d 100644
--- a/cpp/tests/sort/segmented_sort_tests.cpp
+++ b/cpp/tests/sort/segmented_sort_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -201,9 +201,13 @@ TEST_F(SegmentedSortInt, NonZeroSegmentsStart)
   column_wrapper<int> segments1{{0,    2,       5,       8,     11}};
   column_wrapper<int> segments2{{      2,       5,       8,      11}};
   column_wrapper<int> segments3{{                  6,    8,      11}};
+  column_wrapper<int> segments4{{                  6,    8}};
+  column_wrapper<int> segments5{{0,       3,       6}};
   column_wrapper<int> expected1{{0, 1, 2, 4, 3, 7, 5, 6, 9, 10, 8}};
   column_wrapper<int> expected2{{0, 1, 2, 4, 3, 7, 5, 6, 9, 10, 8}};
-  column_wrapper<int> expected3{{2, 4, 5, 3, 0, 1, 7, 6, 9, 10, 8}};
+  column_wrapper<int> expected3{{0, 1, 2, 3, 4, 5, 7, 6, 9, 10, 8}};
+  column_wrapper<int> expected4{{0, 1, 2, 3, 4, 5, 7, 6, 8, 9, 10}};
+  column_wrapper<int> expected5{{2, 0, 1, 4, 5, 3, 6, 7, 8, 9, 10}};
   // clang-format on
   table_view input{{col1}};
   auto results = cudf::detail::segmented_sorted_order(input, segments1);
@@ -212,6 +216,10 @@ TEST_F(SegmentedSortInt, NonZeroSegmentsStart)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected2);
   results = cudf::detail::segmented_sorted_order(input, segments3);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected3);
+  results = cudf::detail::segmented_sorted_order(input, segments4);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected4);
+  results = cudf::detail::segmented_sorted_order(input, segments5);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected5);
 }
 
 TEST_F(SegmentedSortInt, Sliced)
@@ -219,13 +227,13 @@ TEST_F(SegmentedSortInt, Sliced)
   using T = int;
   // clang-format off
   column_wrapper<T>        col1{{8, 9, 2, 3, 2, 2, 4, 1, 7, 5, 6}};
-  // sliced                      2, 2, 4, 1, 7, 5, 6
+  // sliced                                  2, 2, 4, 1, 7, 5, 6
   column_wrapper<int> segments1{{0,    2,       5}};
   column_wrapper<int> segments2{{-4,   0,      2,       5}};
   column_wrapper<int> segments3{{                 7}};
   column_wrapper<int> expected1{{0, 1, 3, 2, 4, 5, 6}};
   column_wrapper<int> expected2{{0, 1, 3, 2, 4, 5, 6}};
-  column_wrapper<int> expected3{{3, 0, 1, 2, 5, 6, 4}};
+  column_wrapper<int> expected3{{0, 1, 2, 3, 4, 5, 6}};
   // clang-format on
   auto slice = cudf::slice(col1, {4, 11})[0];  // 7 elements
   table_view input{{slice}};

From fb0922f9f8f5a14e8dbf0540a3b68eb059e04a35 Mon Sep 17 00:00:00 2001
From: nvdbaranec <56695930+nvdbaranec@users.noreply.github.com>
Date: Thu, 13 Oct 2022 09:24:59 -0500
Subject: [PATCH 032/202] Fix an issue reading struct-of-list types in Parquet.
 (#11910)

Fixes https://github.com/NVIDIA/spark-rapids/issues/6718

There was a bug introduced recently https://github.com/rapidsai/cudf/pull/11752 where an insufficient check for whether an input column contained repetition information could cause incorrect results for column hierarchies with structs at the root.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Jim Brennan (https://github.com/jbrennan333)
  - Nghia Truong (https://github.com/ttnghia)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/11910
---
 cpp/src/io/parquet/page_data.cu    | 7 ++-----
 cpp/src/io/parquet/parquet_gpu.hpp | 7 +++++++
 cpp/src/io/parquet/reader_impl.cu  | 4 ++--
 cpp/tests/io/parquet_test.cpp      | 7 ++++++-
 4 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index a5f6d737637..57d55be6145 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -1860,11 +1860,8 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
         out_buf.create(size, stream, mr);
       }
 
-      // for nested hierarchies, compute per-page start offset.
-      // it would be better/safer to be checking (schema.max_repetition_level > 0) here, but there's
-      // no easy way to get at that info here. we'd have to move this function into reader_impl.cu
-      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) ||
-          out_buf.type.id() == type_id::LIST) {
+      // for nested hierarchies, compute per-page start offset
+      if (input_col.has_repetition) {
         thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
                                       page_keys.begin(),
                                       page_keys.end(),
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 8f4cd5c6f3b..1a8c0f4cd9e 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -57,9 +57,16 @@ constexpr size_type MAX_DICT_SIZE = (1 << MAX_DICT_BITS) - 1;
 struct input_column_info {
   int schema_idx;
   std::string name;
+  bool has_repetition;
   // size == nesting depth. the associated real output
   // buffer index in the dest column for each level of nesting.
   std::vector<int> nesting;
+
+  input_column_info(int _schema_idx, std::string _name, bool _has_repetition)
+    : schema_idx(_schema_idx), name(_name), has_repetition(_has_repetition)
+  {
+  }
+
   auto nesting_depth() const { return nesting.size(); }
 };
 
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 07869189089..0997d2a968d 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -684,8 +684,8 @@ class aggregate_reader_metadata {
         // if I have no children, we're at a leaf and I'm an input column (that is, one with actual
         // data stored) so add me to the list.
         if (schema_elem.num_children == 0) {
-          input_column_info& input_col =
-            input_columns.emplace_back(input_column_info{schema_idx, schema_elem.name});
+          input_column_info& input_col = input_columns.emplace_back(
+            input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0});
 
           // set up child output column for one-level encoding list
           if (schema_elem.is_one_level_list()) {
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 134eff54144..6f1c5ef7eb1 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -2633,6 +2633,11 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
     0, [string_per_row](cudf::size_type idx) { return idx * string_per_row; });
   cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets(offset_iter,
                                                                     offset_iter + num_rows + 1);
+
+  auto _c3_valids =
+    cudf::detail::make_counting_transform_iterator(0, [&](int index) { return index % 200; });
+  std::vector<bool> c3_valids(num_rows);
+  std::copy(_c3_valids, _c3_valids + num_rows, c3_valids.begin());
   auto _c3_list =
     cudf::make_lists_column(num_rows,
                             offsets.release(),
@@ -2646,7 +2651,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
   c3_children.push_back(std::move(c3_list));
   c3_children.push_back(c3_ints.release());
   c3_children.push_back(c3_floats.release());
-  cudf::test::structs_column_wrapper _c3(std::move(c3_children));
+  cudf::test::structs_column_wrapper _c3(std::move(c3_children), c3_valids);
   auto c3 = cudf::purge_nonempty_nulls(static_cast<cudf::structs_column_view>(_c3));
 
   // write it out

From 662f309b62b56472d63b6e981b514205b6eab999 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 13 Oct 2022 23:35:53 +0530
Subject: [PATCH 033/202] Fixes Unsupported column type error due to empty list
 columns in Nested JSON reader (#11897)

Fixes `Unsupported column type` error during cudf column creation in Nested JSON reader due to empty list column.

During json tree creation, Empty list column does not have `device_json_column` child because it does have any rows, or a type.
This PR fixes the issue by creating an empty column as element child column. The list column still retains the null, and empty list information.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11897
---
 cpp/src/io/json/json_column.cu      | 21 +++++++++++++--------
 cpp/src/io/json/nested_json.hpp     |  3 +++
 cpp/src/io/json/nested_json_gpu.cu  | 23 ++++++++++++-----------
 cpp/tests/io/json_test.cpp          | 11 +++++++----
 cpp/tests/io/json_tree.cpp          |  6 +++++-
 python/cudf/cudf/tests/test_json.py | 18 ++++++++++++++++++
 6 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index d54bb5c8ea9..872e742a5af 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -403,7 +403,7 @@ void make_device_json_column(device_span<SymbolT const> input,
     std::string name   = "";
     auto parent_col_id = column_parent_ids[this_col_id];
     if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) {
-      name = "element";
+      name = list_child_name;
     } else if (column_categories[parent_col_id] == NC_FN) {
       auto field_name_col_id = parent_col_id;
       parent_col_id          = column_parent_ids[parent_col_id];
@@ -689,19 +689,24 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
       size_type num_rows = json_col.child_offsets.size() - 1;
       std::vector<column_name_info> column_names{};
       column_names.emplace_back("offsets");
-      column_names.emplace_back(json_col.child_columns.begin()->first);
+      column_names.emplace_back(
+        json_col.child_columns.empty() ? list_child_name : json_col.child_columns.begin()->first);
 
       // Note: json_col modified here, reuse the memory
       auto offsets_column = std::make_unique<column>(
         data_type{type_id::INT32}, num_rows + 1, json_col.child_offsets.release());
       // Create children column
       auto [child_column, names] =
-        device_json_column_to_cudf_column(json_col.child_columns.begin()->second,
-                                          d_input,
-                                          options,
-                                          get_child_schema(json_col.child_columns.begin()->first),
-                                          stream,
-                                          mr);
+        json_col.child_columns.empty()
+          ? std::pair<std::unique_ptr<column>,
+                      std::vector<column_name_info>>{std::make_unique<column>(), {}}
+          : device_json_column_to_cudf_column(
+              json_col.child_columns.begin()->second,
+              d_input,
+              options,
+              get_child_schema(json_col.child_columns.begin()->first),
+              stream,
+              mr);
       column_names.back().children      = names;
       auto [result_bitmask, null_count] = make_validity(json_col);
       return {make_lists_column(num_rows,
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 10d209b2ea6..8a0f3566d58 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -104,6 +104,9 @@ enum node_t : NodeT {
  */
 enum class json_col_t : char { ListColumn, StructColumn, StringColumn, Unknown };
 
+// Default name for a list's child column
+constexpr auto list_child_name{"element"};
+
 /**
  * @brief Intermediate representation of data from a nested JSON input
  */
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 5d60a564b9b..29a29a1f9d5 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1162,9 +1162,6 @@ void make_json_column(json_column& root_column,
   // Range of encapsulating function that parses to internal columnar data representation
   CUDF_FUNC_RANGE();
 
-  // Default name for a list's child column
-  std::string const list_child_name = "element";
-
   // Parse the JSON and get the token stream
   const auto [d_tokens_gpu, d_token_indices_gpu] = get_token_stream(d_input, options, stream, mr);
 
@@ -1286,7 +1283,7 @@ void make_json_column(json_column& root_column,
    * (b) a list, the selected child column corresponds to single child column of
    * the list column. In this case, the child column may not exist yet.
    */
-  auto get_selected_column = [&list_child_name](std::stack<tree_node>& current_data_path) {
+  auto get_selected_column = [](std::stack<tree_node>& current_data_path) {
     json_column* selected_col = current_data_path.top().current_selected_col;
 
     // If the node does not have a selected column yet
@@ -1680,7 +1677,8 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
       size_type num_rows = json_col.child_offsets.size();
       std::vector<column_name_info> column_names{};
       column_names.emplace_back("offsets");
-      column_names.emplace_back(json_col.child_columns.begin()->first);
+      column_names.emplace_back(
+        json_col.child_columns.empty() ? list_child_name : json_col.child_columns.begin()->first);
 
       rmm::device_uvector<json_column::row_offset_t> d_offsets =
         cudf::detail::make_device_uvector_async(json_col.child_offsets, stream, mr);
@@ -1688,12 +1686,15 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> json_column_to
         std::make_unique<column>(data_type{type_id::INT32}, num_rows, d_offsets.release());
       // Create children column
       auto [child_column, names] =
-        json_column_to_cudf_column(json_col.child_columns.begin()->second,
-                                   d_input,
-                                   options,
-                                   get_child_schema(json_col.child_columns.begin()->first),
-                                   stream,
-                                   mr);
+        json_col.child_columns.empty()
+          ? std::pair<std::unique_ptr<column>,
+                      std::vector<column_name_info>>{std::make_unique<column>(), {}}
+          : json_column_to_cudf_column(json_col.child_columns.begin()->second,
+                                       d_input,
+                                       options,
+                                       get_child_schema(json_col.child_columns.begin()->first),
+                                       stream,
+                                       mr);
       column_names.back().children      = names;
       auto [result_bitmask, null_count] = make_validity(json_col);
       return {make_lists_column(num_rows - 1,
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index d7ab881861a..b8cd4622484 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -813,7 +813,6 @@ TEST_P(JsonReaderDualTest, JsonLinesObjectsOutOfOrder)
                                  cudf::test::strings_column_wrapper({"aaa", "bbb"}));
 }
 
-/*
 // currently, the json reader is strict about having non-empty input.
 TEST_F(JsonReaderTest, EmptyFile)
 {
@@ -824,7 +823,9 @@ TEST_F(JsonReaderTest, EmptyFile)
   }
 
   cudf::io::json_reader_options in_options =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
+      .lines(true)
+      .experimental(true);
   auto result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
@@ -832,6 +833,7 @@ TEST_F(JsonReaderTest, EmptyFile)
 }
 
 // currently, the json reader is strict about having non-empty input.
+// experimental reader supports empty input
 TEST_F(JsonReaderTest, NoDataFile)
 {
   auto filepath = temp_env->get_temp_dir() + "NoDataFile.csv";
@@ -841,13 +843,14 @@ TEST_F(JsonReaderTest, NoDataFile)
   }
 
   cudf::io::json_reader_options in_options =
-    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath}).lines(true);
+    cudf::io::json_reader_options::builder(cudf::io::source_info{filepath})
+      .lines(true)
+      .experimental(true);
   cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
 
   const auto view = result.tbl->view();
   EXPECT_EQ(0, view.num_columns());
 }
-*/
 
 TEST_F(JsonReaderTest, ArrowFileSource)
 {
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index 3d024fe8af8..6f7e28a2ca3 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -773,7 +773,11 @@ std::vector<std::string> json_lines_list = {
  { "a": { "y" : 6, "z": [] }}
  { "a": { "y" : 6, "z": [2, 3, 4, 5] }}
  { "a": { "z": [4], "y" : 6 }}
- { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )"};
+ { "a" : { "x" : 8, "y": 9 }, "b" : {"x": 10 , "z": 11 }} )",
+  // empty list, row.
+  R"( {"a" : [], "b" : {}}
+ {"a" : []}
+ {"b" : {}})"};
 INSTANTIATE_TEST_SUITE_P(Mixed_And_Records,
                          JsonTreeTraversalTest,
                          ::testing::Combine(::testing::Values(false),
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 1fdef44546a..fb2c24b3757 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -649,6 +649,24 @@ def test_json_nested_data():
     assert df.to_arrow().equals(pa_table_pdf)
 
 
+def test_json_empty_types():
+    json_str = """ {}
+    {"a": [], "b": {}}
+    {"a": []}
+    {"b": {}}
+    {"c": {"d": []}}
+    {"e": [{}]}
+    """
+    df = cudf.read_json(
+        StringIO(json_str),
+        engine="cudf_experimental",
+        orient="records",
+        lines=True,
+    )
+    pdf = pd.read_json(StringIO(json_str), orient="records", lines=True)
+    assert_eq(df, pdf)
+
+
 def test_json_types_data():
     # 0:<0:string,1:float>
     # 1:list<int>

From c824fee8181d06ba1c05a5de4d4ebc0a52027753 Mon Sep 17 00:00:00 2001
From: Gregory Kimball <gregory.kimball@sunpowercorp.com>
Date: Thu, 13 Oct 2022 16:21:09 -0700
Subject: [PATCH 034/202] Add clear indication of non-GPU accelerated
 parameters in read_json docstring (#11825)

This PR moves the "pandas engine only" arguments to the end of the optional argument list of the docstring.

This is the way an `admonition` will look like:
<img width="592" alt="Screen Shot 2022-10-11 at 12 06 50 PM" src="https://user-images.githubusercontent.com/11664259/195161106-71a1ec40-7e1b-4297-b6d9-67ff3a5aacc7.png">

Authors:
  - Gregory Kimball (https://github.com/GregoryKimball)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11825
---
 python/cudf/cudf/utils/ioutils.py | 80 ++++++++++++++++++++++++++-----
 1 file changed, 68 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 366b2e0ebae..0a0647f1297 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -451,7 +451,7 @@
 """
 doc_to_orc = docfmt_partial(docstring=_docstring_to_orc)
 
-_docstring_read_json = """
+_docstring_read_json = r"""
 Load a JSON dataset into a DataFrame
 
 Parameters
@@ -466,8 +466,13 @@
 engine : {{ 'auto', 'cudf', 'cudf_experimental', 'pandas' }}, default 'auto'
     Parser engine to use. If 'auto' is passed, the engine will be
     automatically selected based on the other parameters.
-orient : string,
-    Indication of expected JSON string format (pandas engine only).
+orient : string
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    Indication of expected JSON string format.
     Compatible JSON strings can be produced by ``to_json()`` with a
     corresponding orient value.
     The set of possible orients is:
@@ -500,12 +505,23 @@
 typ : type of object to recover (series or frame), default 'frame'
     With cudf engine, only frame output is supported.
 dtype : boolean or dict, default True
-    If True, infer dtypes, if a dict of column to dtype, then use those,
-    if False, then don't infer dtypes at all, applies only to the data.
+    If True, infer dtypes for all columns; if False, then don't infer dtypes at all,
+    if a dict, provide a mapping from column names to their respective dtype (any missing
+    columns will have their dtype inferred). Applies only to the data.
 convert_axes : boolean, default True
-    Try to convert the axes to the proper dtypes (pandas engine only).
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    Try to convert the axes to the proper dtypes.
 convert_dates : boolean, default True
-    List of columns to parse for dates (pandas engine only); If True, then try
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    List of columns to parse for dates; If True, then try
     to parse datelike columns default is True; a column label is datelike if
 
     * it ends with ``'_at'``,
@@ -514,27 +530,57 @@
     * it is ``'modified'``, or
     * it is ``'date'``
 keep_default_dates : boolean, default True
-    If parsing dates, parse the default datelike columns (pandas engine only)
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    If parsing dates, parse the default datelike columns.
 numpy : boolean, default False
-    Direct decoding to numpy arrays (pandas engine only). Supports numeric
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    Direct decoding to numpy arrays. Supports numeric
     data only, but non-numeric column and index labels are supported. Note
     also that the JSON ordering MUST be the same for each term if numpy=True.
 precise_float : boolean, default False
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
     Set to enable usage of higher precision (strtod) function when
     decoding string to double values (pandas engine only). Default (False)
     is to use fast but less precise builtin functionality
 date_unit : string, default None
-    The timestamp unit to detect if converting dates (pandas engine only).
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    The timestamp unit to detect if converting dates.
     The default behavior is to try and detect the correct precision, but if
     this is not desired then pass one of 's', 'ms', 'us' or 'ns' to force
     parsing only seconds, milliseconds, microseconds or nanoseconds.
 encoding : str, default is 'utf-8'
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
     The encoding to use to decode py3 bytes.
     With cudf engine, only utf-8 is supported.
 lines : boolean, default False
     Read the file as a json object per line.
 chunksize : integer, default None
-    Return JsonReader object for iteration (pandas engine only).
+
+    .. admonition:: Not GPU-accelerated
+
+       This parameter is only supported with ``engine='pandas'``.
+
+    Return JsonReader object for iteration.
     See the `line-delimited json docs
     <http://pandas.pydata.org/pandas-docs/stable/io.html#io-jsonl>`_
     for more information on ``chunksize``.
@@ -547,12 +593,22 @@
     otherwise. If using 'zip', the ZIP file must contain only one data
     file to be read in. Set to None for no decompression.
 byte_range : list or tuple, default None
-    Byte range within the input file to be read (cudf engine only).
+
+    .. admonition:: GPU-accelerated
+
+       This parameter is only supported with ``engine='cudf'``.
+
+    Byte range within the input file to be read.
     The first number is the offset in bytes, the second number is the range
     size in bytes. Set the size to zero to read all data after the offset
     location. Reads the row that starts before or at the end of the range,
     even if it ends after the end of the range.
 keep_quotes : bool, default False
+
+    .. admonition:: GPU-accelerated experimental feature
+
+       This parameter is only supported with ``engine='cudf_experimental'``.
+
     This parameter is only supported in ``cudf_experimental`` engine.
     If `True`, any string values are read literally (and wrapped in an
     additional set of quotes).

From e91d7d9ef1eb3128de99f78d0127050bb12110d4 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Fri, 14 Oct 2022 14:22:41 +0530
Subject: [PATCH 035/202] Reduce memory usage in nested JSON parser - tree
 generation (#11864)

Reduces Memory usage by 53% in nested JSON parser tree generation algorithm.
1GB JSON takes 8.469 GiB instead of 16.957 GiB. All values below are for 1 GB JSON text input.

This PR employs following optimisations to reduce memory usage
 - Modified to generate parent node ids from nodes instead of tokens. (16.957 GB -> 10.957 GiB)
 - Reordered node_range, node_categories generation to the end. (10.957 GiB -> 9.774 GiB)
 - Scope limited token_levels (9.774 GiB -> 9.403 GiB)
 - Used CUB sort instead of `thrust::stable_sort_by_key` (9.403 GiB  -> 8.487 GiB)
 - Used `cub::DoubleBuffer` which eliminates copy of order. (8.487 GiB -> 7.97 GiB)

The peak memory is reduced by 53%, parsing bandwidth still remains same. (1.6 GB/s in GV100 for 1GB JSON).

Since `get_stack_context` in JSON parser takes  highest memory usage (8.469 GB), peak memory is not influenced by JSON tree generation step anymore. Peak memory is now 50% of that of earlier code.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11864
---
 cpp/src/io/json/json_tree.cu | 235 ++++++++++++++++++++++-------------
 1 file changed, 150 insertions(+), 85 deletions(-)

diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index dbf026c351e..cf041b02a20 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -29,6 +29,8 @@
 
 #include <cuco/static_map.cuh>
 
+#include <cub/device/device_radix_sort.cuh>
+
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
@@ -39,6 +41,7 @@
 #include <thrust/fill.h>
 #include <thrust/gather.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/permutation_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/reduce.h>
@@ -125,6 +128,75 @@ struct node_ranges {
   }
 };
 
+/**
+ * @brief Returns stable sorted keys and its sorted order
+ *
+ * Uses cub stable radix sort. The order is internally generated, hence it saves a copy and memory.
+ * Since the key and order is returned, using double buffer helps to avoid extra copy to user
+ * provided output iterator.
+ *
+ * @tparam IndexType sorted order type
+ * @tparam KeyType key type
+ * @param keys keys to sort
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ * @return Sorted keys and indices producing that sorted order
+ */
+template <typename IndexType = size_t, typename KeyType>
+std::pair<rmm::device_uvector<KeyType>, rmm::device_uvector<IndexType>> stable_sorted_key_order(
+  cudf::device_span<KeyType const> keys, rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+
+  // Determine temporary device storage requirements
+  rmm::device_uvector<KeyType> keys_buffer1(keys.size(), stream);
+  rmm::device_uvector<KeyType> keys_buffer2(keys.size(), stream);
+  rmm::device_uvector<IndexType> order_buffer1(keys.size(), stream);
+  rmm::device_uvector<IndexType> order_buffer2(keys.size(), stream);
+  cub::DoubleBuffer<IndexType> order_buffer(order_buffer1.data(), order_buffer2.data());
+  cub::DoubleBuffer<KeyType> keys_buffer(keys_buffer1.data(), keys_buffer2.data());
+  size_t temp_storage_bytes = 0;
+  cub::DeviceRadixSort::SortPairs(
+    nullptr, temp_storage_bytes, keys_buffer, order_buffer, keys.size());
+  rmm::device_buffer d_temp_storage(temp_storage_bytes, stream);
+
+  thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin());
+  thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end());
+
+  cub::DeviceRadixSort::SortPairs(
+    d_temp_storage.data(), temp_storage_bytes, keys_buffer, order_buffer, keys.size());
+
+  return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1)
+                                                                : std::move(keys_buffer2),
+                   order_buffer.Current() == order_buffer1.data() ? std::move(order_buffer1)
+                                                                  : std::move(order_buffer2)};
+}
+
+/**
+ * @brief Propagate parent node to siblings from first sibling.
+ *
+ * @param node_levels Node levels of each node
+ * @param parent_node_ids parent node ids initialized for first child of each push node,
+ *                       and other siblings are initialized to -1.
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+void propagate_parent_to_siblings(cudf::device_span<TreeDepthT const> node_levels,
+                                  cudf::device_span<NodeIndexT> parent_node_ids,
+                                  rmm::cuda_stream_view stream)
+{
+  CUDF_FUNC_RANGE();
+  auto [sorted_node_levels, sorted_order] = stable_sorted_key_order<size_type>(node_levels, stream);
+  // instead of gather, using permutation_iterator, which is ~17% faster
+
+  thrust::inclusive_scan_by_key(
+    rmm::exec_policy(stream),
+    sorted_node_levels.begin(),
+    sorted_node_levels.end(),
+    thrust::make_permutation_iterator(parent_node_ids.begin(), sorted_order.begin()),
+    thrust::make_permutation_iterator(parent_node_ids.begin(), sorted_order.begin()),
+    thrust::equal_to<TreeDepthT>{},
+    thrust::maximum<NodeIndexT>{});
+}
+
 // Generates a tree representation of the given tokens, token_indices.
 tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
                                     device_span<SymbolOffsetT const> token_indices,
@@ -166,12 +238,86 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
   };
 
   auto num_tokens = tokens.size();
-  auto is_node_it = thrust::make_transform_iterator(
-    tokens.begin(),
-    [is_node] __device__(auto t) -> size_type { return static_cast<size_type>(is_node(t)); });
-  auto num_nodes = thrust::count_if(
+  auto num_nodes  = thrust::count_if(
     rmm::exec_policy(stream), tokens.begin(), tokens.begin() + num_tokens, is_node);
 
+  // Node levels: transform_exclusive_scan, copy_if.
+  rmm::device_uvector<TreeDepthT> node_levels(num_nodes, stream, mr);
+  {
+    rmm::device_uvector<TreeDepthT> token_levels(num_tokens, stream);
+    auto push_pop_it = thrust::make_transform_iterator(
+      tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> size_type {
+        return does_push(token) - does_pop(token);
+      });
+    thrust::exclusive_scan(
+      rmm::exec_policy(stream), push_pop_it, push_pop_it + num_tokens, token_levels.begin());
+
+    auto node_levels_end = thrust::copy_if(rmm::exec_policy(stream),
+                                           token_levels.begin(),
+                                           token_levels.begin() + num_tokens,
+                                           tokens.begin(),
+                                           node_levels.begin(),
+                                           is_node);
+    CUDF_EXPECTS(thrust::distance(node_levels.begin(), node_levels_end) == num_nodes,
+                 "node level count mismatch");
+  }
+
+  // Node parent ids:
+  // previous push node_id transform, stable sort by level, segmented scan with Max, reorder.
+  rmm::device_uvector<NodeIndexT> parent_node_ids(num_nodes, stream, mr);
+  // This block of code is generalized logical stack algorithm. TODO: make this a seperate function.
+  {
+    rmm::device_uvector<NodeIndexT> node_token_ids(num_nodes, stream);
+    thrust::copy_if(rmm::exec_policy(stream),
+                    thrust::make_counting_iterator<NodeIndexT>(0),
+                    thrust::make_counting_iterator<NodeIndexT>(0) + num_tokens,
+                    tokens.begin(),
+                    node_token_ids.begin(),
+                    is_node);
+
+    // previous push node_id
+    // if previous node is a push, then i-1
+    // if previous node is FE, then i-2 (returns FB's index)
+    // if previous node is SMB and its previous node is a push, then i-2
+    // eg. `{ SMB FB FE VB VE SME` -> `{` index as FB's parent.
+    // else -1
+    auto first_childs_parent_token_id = [tokens_gpu =
+                                           tokens.begin()] __device__(auto i) -> NodeIndexT {
+      if (i <= 0) { return -1; }
+      if (tokens_gpu[i - 1] == token_t::StructBegin or tokens_gpu[i - 1] == token_t::ListBegin) {
+        return i - 1;
+      } else if (tokens_gpu[i - 1] == token_t::FieldNameEnd) {
+        return i - 2;
+      } else if (tokens_gpu[i - 1] == token_t::StructMemberBegin and
+                 (tokens_gpu[i - 2] == token_t::StructBegin ||
+                  tokens_gpu[i - 2] == token_t::ListBegin)) {
+        return i - 2;
+      } else {
+        return -1;
+      }
+    };
+
+    thrust::transform(
+      rmm::exec_policy(stream),
+      node_token_ids.begin(),
+      node_token_ids.end(),
+      parent_node_ids.begin(),
+      [node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
+        NodeIndexT const tid) -> NodeIndexT {
+        auto pid = first_childs_parent_token_id(tid);
+        return pid < 0
+                 ? parent_node_sentinel
+                 : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) -
+                     node_ids_gpu;
+        // parent_node_sentinel is -1, useful for segmented max operation below
+      });
+  }
+  // Propagate parent node to siblings from first sibling - inplace.
+  propagate_parent_to_siblings(
+    cudf::device_span<TreeDepthT const>{node_levels.data(), node_levels.size()},
+    parent_node_ids,
+    stream);
+
   // Node categories: copy_if with transform.
   rmm::device_uvector<NodeT> node_categories(num_nodes, stream, mr);
   auto node_categories_it =
@@ -184,24 +330,6 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
   CUDF_EXPECTS(node_categories_end - node_categories_it == num_nodes,
                "node category count mismatch");
 
-  // Node levels: transform_exclusive_scan, copy_if.
-  rmm::device_uvector<TreeDepthT> token_levels(num_tokens, stream);
-  auto push_pop_it = thrust::make_transform_iterator(
-    tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> size_type {
-      return does_push(token) - does_pop(token);
-    });
-  thrust::exclusive_scan(
-    rmm::exec_policy(stream), push_pop_it, push_pop_it + num_tokens, token_levels.begin());
-
-  rmm::device_uvector<TreeDepthT> node_levels(num_nodes, stream, mr);
-  auto node_levels_end = thrust::copy_if(rmm::exec_policy(stream),
-                                         token_levels.begin(),
-                                         token_levels.begin() + num_tokens,
-                                         tokens.begin(),
-                                         node_levels.begin(),
-                                         is_node);
-  CUDF_EXPECTS(node_levels_end - node_levels.begin() == num_nodes, "node level count mismatch");
-
   // Node ranges: copy_if with transform.
   rmm::device_uvector<SymbolOffsetT> node_range_begin(num_nodes, stream, mr);
   rmm::device_uvector<SymbolOffsetT> node_range_end(num_nodes, stream, mr);
@@ -223,69 +351,6 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
                     });
   CUDF_EXPECTS(node_range_out_end - node_range_out_it == num_nodes, "node range count mismatch");
 
-  // Node parent ids: previous push token_id transform, stable sort, segmented scan with Max,
-  // reorder, copy_if. This one is sort of logical stack. But more generalized.
-  // TODO: make it own function.
-  rmm::device_uvector<size_type> parent_token_ids(num_tokens, stream);
-  rmm::device_uvector<size_type> initial_order(num_tokens, stream);
-  // TODO re-write the algorithm to work only on nodes, not tokens.
-
-  thrust::sequence(rmm::exec_policy(stream), initial_order.begin(), initial_order.end());
-  thrust::tabulate(rmm::exec_policy(stream),
-                   parent_token_ids.begin(),
-                   parent_token_ids.end(),
-                   [does_push, tokens_gpu = tokens.begin()] __device__(auto i) -> size_type {
-                     return (i > 0) && does_push(tokens_gpu[i - 1]) ? i - 1 : -1;
-                     // -1, not sentinel used here because of max operation below
-                   });
-
-  auto out_pid = thrust::make_zip_iterator(parent_token_ids.data(), initial_order.data());
-  // Uses radix sort for builtin types.
-  thrust::stable_sort_by_key(rmm::exec_policy(stream),
-                             token_levels.data(),
-                             token_levels.data() + token_levels.size(),
-                             out_pid);
-
-  // SegmentedScan Max.
-  thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
-                                token_levels.data(),
-                                token_levels.data() + token_levels.size(),
-                                parent_token_ids.data(),
-                                parent_token_ids.data(),
-                                thrust::equal_to<size_type>{},
-                                thrust::maximum<size_type>{});
-
-  // scatter to restore the original order.
-  {
-    rmm::device_uvector<size_type> temp_storage(num_tokens, stream);
-    thrust::scatter(rmm::exec_policy(stream),
-                    parent_token_ids.begin(),
-                    parent_token_ids.end(),
-                    initial_order.begin(),
-                    temp_storage.begin());
-    thrust::copy(
-      rmm::exec_policy(stream), temp_storage.begin(), temp_storage.end(), parent_token_ids.begin());
-  }
-
-  rmm::device_uvector<size_type> node_ids_gpu(num_tokens, stream);
-  thrust::exclusive_scan(
-    rmm::exec_policy(stream), is_node_it, is_node_it + num_tokens, node_ids_gpu.begin());
-
-  rmm::device_uvector<NodeIndexT> parent_node_ids(num_nodes, stream, mr);
-  auto parent_node_ids_it = thrust::make_transform_iterator(
-    parent_token_ids.begin(),
-    [node_ids_gpu = node_ids_gpu.begin()] __device__(size_type const pid) -> NodeIndexT {
-      return pid < 0 ? parent_node_sentinel : node_ids_gpu[pid];
-    });
-  auto parent_node_ids_end = thrust::copy_if(rmm::exec_policy(stream),
-                                             parent_node_ids_it,
-                                             parent_node_ids_it + parent_token_ids.size(),
-                                             tokens.begin(),
-                                             parent_node_ids.begin(),
-                                             is_node);
-  CUDF_EXPECTS(parent_node_ids_end - parent_node_ids.begin() == num_nodes,
-               "parent node id gather mismatch");
-
   return {std::move(node_categories),
           std::move(parent_node_ids),
           std::move(node_levels),

From 8a31e26b420afa7ea7aa0255a8fca002f2f47fd5 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <tribizel@nvidia.com>
Date: Fri, 14 Oct 2022 19:05:21 +0200
Subject: [PATCH 036/202] Fix local offset handling in bgzip reader (#11918)

We accidentally checked the local offset against the compressed, not the uncompressed size. The new test failed prior to fixing the behavior.

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11918
---
 cpp/src/io/text/bgzip_data_chunk_source.cu   |  4 +-
 cpp/tests/io/text/data_chunk_source_test.cpp | 72 ++++++++++++--------
 2 files changed, 47 insertions(+), 29 deletions(-)

diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 9c4ff218783..e4b6bad614d 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -271,8 +271,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
     // seek to the beginning of the provided local offset
     auto const local_pos = virtual_begin & 0xFFFFu;
     if (local_pos > 0) {
-      CUDF_EXPECTS(_curr_blocks.h_compressed_offsets.size() > 1 &&
-                     local_pos < _curr_blocks.h_compressed_offsets[1],
+      CUDF_EXPECTS(_curr_blocks.h_decompressed_offsets.size() > 1 &&
+                     local_pos < _curr_blocks.h_decompressed_offsets[1],
                    "local part of virtual offset is out of bounds");
       _curr_blocks.consume_bytes(local_pos);
     }
diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp
index 7cb75aea8e2..2111d66a066 100644
--- a/cpp/tests/io/text/data_chunk_source_test.cpp
+++ b/cpp/tests/io/text/data_chunk_source_test.cpp
@@ -130,6 +130,11 @@ enum class compression { ENABLED, DISABLED };
 
 enum class eof { ADD_EOF_BLOCK, NO_EOF_BLOCK };
 
+uint64_t virtual_offset(std::size_t block_offset, std::size_t local_offset)
+{
+  return (block_offset << 16) | local_offset;
+}
+
 void write_bgzip(std::ostream& output_stream,
                  cudf::host_span<const char> data,
                  std::default_random_engine& rng,
@@ -193,6 +198,7 @@ TEST_F(DataChunkSourceTest, BgzipSource)
 {
   auto const filename = temp_env->get_temp_filepath("bgzip_source");
   std::string input{"bananarama"};
+  input.reserve(input.size() << 25);
   for (int i = 0; i < 24; i++) {
     input = input + input;
   }
@@ -211,13 +217,11 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets)
 {
   auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets");
   std::string input{"bananarama"};
+  input.reserve(input.size() << 25);
   for (int i = 0; i < 24; i++) {
     input = input + input;
   }
-  std::string padding_garbage{"garbage"};
-  for (int i = 0; i < 10; i++) {
-    padding_garbage = padding_garbage + padding_garbage;
-  }
+  std::string const padding_garbage(10000, 'g');
   std::string const data_garbage{"GARBAGE"};
   std::string const begininput{"begin of bananarama"};
   std::string const endinput{"end of bananarama"};
@@ -241,10 +245,10 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsets)
   }
   input = begininput + input + endinput;
 
-  auto const source =
-    cudf::io::text::make_source_from_bgzip_file(filename,
-                                                begin_compressed_offset << 16 | begin_local_offset,
-                                                end_compressed_offset << 16 | end_local_offset);
+  auto const source = cudf::io::text::make_source_from_bgzip_file(
+    filename,
+    virtual_offset(begin_compressed_offset, begin_local_offset),
+    virtual_offset(end_compressed_offset, end_local_offset));
 
   test_source(input, *source);
 }
@@ -255,8 +259,6 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock)
   std::string const input{"collection unit brings"};
   std::string const head_garbage{"garbage"};
   std::string const tail_garbage{"GARBAGE"};
-  std::size_t begin_compressed_offset{};
-  std::size_t end_compressed_offset{};
   std::size_t const begin_local_offset{head_garbage.size()};
   std::size_t const end_local_offset{head_garbage.size() + input.size()};
   {
@@ -266,10 +268,8 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleGZipBlock)
     cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
   }
 
-  auto const source =
-    cudf::io::text::make_source_from_bgzip_file(filename,
-                                                begin_compressed_offset << 16 | begin_local_offset,
-                                                end_compressed_offset << 16 | end_local_offset);
+  auto const source = cudf::io::text::make_source_from_bgzip_file(
+    filename, virtual_offset(0, begin_local_offset), virtual_offset(0, end_local_offset));
 
   test_source(input, *source);
 }
@@ -280,7 +280,6 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk)
   std::string const input{"collection unit brings"};
   std::string const head_garbage{"garbage"};
   std::string const tail_garbage{"GARBAGE"};
-  std::size_t begin_compressed_offset{};
   std::size_t end_compressed_offset{};
   std::size_t const begin_local_offset{head_garbage.size()};
   std::size_t const end_local_offset{input.size() - 10};
@@ -294,10 +293,10 @@ TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleChunk)
     cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
   }
 
-  auto const source =
-    cudf::io::text::make_source_from_bgzip_file(filename,
-                                                begin_compressed_offset << 16 | begin_local_offset,
-                                                end_compressed_offset << 16 | end_local_offset);
+  auto const source = cudf::io::text::make_source_from_bgzip_file(
+    filename,
+    virtual_offset(0, begin_local_offset),
+    virtual_offset(end_compressed_offset, end_local_offset));
 
   test_source(input, *source);
 }
@@ -306,13 +305,11 @@ TEST_F(DataChunkSourceTest, BgzipCompressedSourceVirtualOffsets)
 {
   auto const filename = temp_env->get_temp_filepath("bgzip_source_compressed_offsets");
   std::string input{"bananarama"};
+  input.reserve(input.size() << 25);
   for (int i = 0; i < 24; i++) {
     input = input + input;
   }
-  std::string padding_garbage{"garbage"};
-  for (int i = 0; i < 10; i++) {
-    padding_garbage = padding_garbage + padding_garbage;
-  }
+  std::string const padding_garbage(10000, 'g');
   std::string const data_garbage{"GARBAGE"};
   std::string const begininput{"begin of bananarama"};
   std::string const endinput{"end of bananarama"};
@@ -335,10 +332,31 @@ TEST_F(DataChunkSourceTest, BgzipCompressedSourceVirtualOffsets)
   }
   input = begininput + input + endinput;
 
-  auto source =
-    cudf::io::text::make_source_from_bgzip_file(filename,
-                                                begin_compressed_offset << 16 | begin_local_offset,
-                                                end_compressed_offset << 16 | end_local_offset);
+  auto source = cudf::io::text::make_source_from_bgzip_file(
+    filename,
+    virtual_offset(begin_compressed_offset, begin_local_offset),
+    virtual_offset(end_compressed_offset, end_local_offset));
+  test_source(input, *source);
+}
+
+TEST_F(DataChunkSourceTest, BgzipSourceVirtualOffsetsSingleCompressedGZipBlock)
+{
+  auto const filename = temp_env->get_temp_filepath("bgzip_source_offsets_single_compressed_block");
+  std::string const input{"collection unit brings"};
+  std::string const head_garbage(10000, 'g');
+  std::string const tail_garbage{"GARBAGE"};
+  std::size_t const begin_local_offset{head_garbage.size()};
+  std::size_t const end_local_offset{head_garbage.size() + input.size()};
+  {
+    std::ofstream output_stream{filename};
+    cudf::io::text::detail::bgzip::write_compressed_block(output_stream,
+                                                          head_garbage + input + tail_garbage);
+    cudf::io::text::detail::bgzip::write_uncompressed_block(output_stream, {});
+  }
+
+  auto const source = cudf::io::text::make_source_from_bgzip_file(
+    filename, virtual_offset(0, begin_local_offset), virtual_offset(0, end_local_offset));
+
   test_source(input, *source);
 }
 

From 759825359b61dfbbf2d8464d8701f4eadcb253e5 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 14 Oct 2022 13:32:10 -0400
Subject: [PATCH 037/202] Add libcudf strings examples (#11849)

Creates example for calling libcudf APIs for strings processing.
This also includes examples of building custom kernels for modifying libcudf strings columns.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Robert Maynard (https://github.com/robertmaynard)
  - Mark Sadang (https://github.com/msadang)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/11849
---
 ci/release/update-version.sh               |   1 +
 cpp/examples/basic/CMakeLists.txt          |   2 +-
 cpp/examples/build.sh                      |   9 +-
 cpp/examples/strings/CMakeLists.txt        |  49 ++++++
 cpp/examples/strings/common.hpp            | 114 ++++++++++++++
 cpp/examples/strings/custom_optimized.cu   | 165 +++++++++++++++++++++
 cpp/examples/strings/custom_prealloc.cu    | 126 ++++++++++++++++
 cpp/examples/strings/custom_with_malloc.cu | 158 ++++++++++++++++++++
 cpp/examples/strings/libcudf_apis.cpp      |  62 ++++++++
 cpp/examples/strings/names.csv             |  20 +++
 10 files changed, 704 insertions(+), 2 deletions(-)
 create mode 100644 cpp/examples/strings/CMakeLists.txt
 create mode 100644 cpp/examples/strings/common.hpp
 create mode 100644 cpp/examples/strings/custom_optimized.cu
 create mode 100644 cpp/examples/strings/custom_prealloc.cu
 create mode 100644 cpp/examples/strings/custom_with_malloc.cu
 create mode 100644 cpp/examples/strings/libcudf_apis.cpp
 create mode 100644 cpp/examples/strings/names.csv

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 8fad4e08c56..c23f558f071 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -74,6 +74,7 @@ sed_runner "s/cudf=${CURRENT_SHORT_TAG}/cudf=${NEXT_SHORT_TAG}/g" README.md
 
 # Libcudf examples update
 sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/basic/CMakeLists.txt
+sed_runner "s/CUDF_TAG branch-${CURRENT_SHORT_TAG}/CUDF_TAG branch-${NEXT_SHORT_TAG}/" cpp/examples/strings/CMakeLists.txt
 
 # ucx-py version update
 sed_runner "s/export UCX_PY_VERSION=.*/export UCX_PY_VERSION='${NEXT_UCX_PY_VERSION}'/g" ci/gpu/build.sh
diff --git a/cpp/examples/basic/CMakeLists.txt b/cpp/examples/basic/CMakeLists.txt
index b182cb08774..7e7c6b191b5 100644
--- a/cpp/examples/basic/CMakeLists.txt
+++ b/cpp/examples/basic/CMakeLists.txt
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required(VERSION 3.23.1)
 
 project(
   basic_example
diff --git a/cpp/examples/build.sh b/cpp/examples/build.sh
index 079f7358872..7d389cd318d 100755
--- a/cpp/examples/build.sh
+++ b/cpp/examples/build.sh
@@ -17,8 +17,15 @@ LIB_BUILD_DIR=${LIB_BUILD_DIR:-$(readlink -f "${EXAMPLES_DIR}/../build")}
 # Basic example
 BASIC_EXAMPLE_DIR=${EXAMPLES_DIR}/basic
 BASIC_EXAMPLE_BUILD_DIR=${BASIC_EXAMPLE_DIR}/build
-
 # Configure
 cmake -S ${BASIC_EXAMPLE_DIR} -B ${BASIC_EXAMPLE_BUILD_DIR} -Dcudf_ROOT="${LIB_BUILD_DIR}"
 # Build
 cmake --build ${BASIC_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL}
+
+# Strings example
+STRINGS_EXAMPLE_DIR=${EXAMPLES_DIR}/strings
+STRINGS_EXAMPLE_BUILD_DIR=${STRINGS_EXAMPLE_DIR}/build
+# Configure
+cmake -S ${STRINGS_EXAMPLE_DIR} -B ${STRINGS_EXAMPLE_BUILD_DIR} -Dcudf_ROOT="${LIB_BUILD_DIR}"
+# Build
+cmake --build ${STRINGS_EXAMPLE_BUILD_DIR} -j${PARALLEL_LEVEL}
diff --git a/cpp/examples/strings/CMakeLists.txt b/cpp/examples/strings/CMakeLists.txt
new file mode 100644
index 00000000000..1a16b2bc8fd
--- /dev/null
+++ b/cpp/examples/strings/CMakeLists.txt
@@ -0,0 +1,49 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+cmake_minimum_required(VERSION 3.23.1)
+
+project(
+  strings_examples
+  VERSION 0.0.1
+  LANGUAGES CXX CUDA
+)
+
+set(CPM_DOWNLOAD_VERSION v0.35.3)
+file(
+  DOWNLOAD
+  https://github.com/cpm-cmake/CPM.cmake/releases/download/${CPM_DOWNLOAD_VERSION}/get_cpm.cmake
+  ${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake
+)
+include(${CMAKE_BINARY_DIR}/cmake/get_cpm.cmake)
+
+set(CUDF_TAG branch-22.12)
+CPMFindPackage(
+  NAME cudf GIT_REPOSITORY https://github.com/rapidsai/cudf
+  GIT_TAG ${CUDF_TAG}
+  GIT_SHALLOW
+    TRUE
+    SOURCE_SUBDIR
+    cpp
+)
+
+list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
+
+#
+add_executable(libcudf_apis libcudf_apis.cpp)
+target_compile_features(libcudf_apis PRIVATE cxx_std_17)
+target_link_libraries(libcudf_apis PRIVATE cudf::cudf nvToolsExt)
+
+add_executable(custom_with_malloc custom_with_malloc.cu)
+target_compile_features(custom_with_malloc PRIVATE cxx_std_17)
+target_compile_options(custom_with_malloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
+target_link_libraries(custom_with_malloc PRIVATE cudf::cudf nvToolsExt)
+
+add_executable(custom_prealloc custom_prealloc.cu)
+target_compile_features(custom_prealloc PRIVATE cxx_std_17)
+target_compile_options(custom_prealloc PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
+target_link_libraries(custom_prealloc PRIVATE cudf::cudf nvToolsExt)
+
+add_executable(custom_optimized custom_optimized.cu)
+target_compile_features(custom_optimized PRIVATE cxx_std_17)
+target_compile_options(custom_optimized PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>")
+target_link_libraries(custom_optimized PRIVATE cudf::cudf nvToolsExt)
diff --git a/cpp/examples/strings/common.hpp b/cpp/examples/strings/common.hpp
new file mode 100644
index 00000000000..dbd3c4dbd1b
--- /dev/null
+++ b/cpp/examples/strings/common.hpp
@@ -0,0 +1,114 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/io/csv.hpp>
+#include <cudf/io/datasource.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/owning_wrapper.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
+#include <chrono>
+#include <iostream>
+#include <memory>
+#include <string>
+
+/**
+ * @brief Main example function returns redacted strings column.
+ *
+ * This function returns a redacted version of the input `names` column
+ * using the the `visibilities` column as in the following example
+ * ```
+ * names        visibility  --> redacted
+ * John Doe     public          D John
+ * Bobby Joe    private         X X
+ * ```
+ *
+ * @param names First and last names separated with a single space
+ * @param visibilities String values `public` or `private` only
+ * @return Redacted strings column
+ */
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities);
+
+/**
+ * @brief Create CUDA memory resource
+ */
+auto make_cuda_mr() { return std::make_shared<rmm::mr::cuda_memory_resource>(); }
+
+/**
+ * @brief Create a pool device memory resource
+ */
+auto make_pool_mr()
+{
+  return rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(make_cuda_mr());
+}
+
+/**
+ * @brief Create memory resource for libcudf functions
+ */
+std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(std::string const& name)
+{
+  if (name == "pool") { return make_pool_mr(); }
+  return make_cuda_mr();
+}
+
+/**
+ * @brief Main for strings examples
+ *
+ * Command line parameters:
+ * 1. CSV file name/path
+ * 2. Memory resource (optional): 'pool' or 'cuda'
+ *
+ * The stdout includes the number of rows in the input and the output size in bytes.
+ */
+int main(int argc, char const** argv)
+{
+  if (argc < 2) {
+    std::cout << "required parameter: csv-file-path\n";
+    return 1;
+  }
+
+  auto const mr_name = std::string{argc > 2 ? std::string(argv[2]) : std::string("cuda")};
+  auto resource      = create_memory_resource(mr_name);
+  rmm::mr::set_current_device_resource(resource.get());
+
+  auto const csv_file   = std::string{argv[1]};
+  auto const csv_result = [csv_file] {
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_file}).header(-1);
+    return cudf::io::read_csv(in_opts).tbl;
+  }();
+  auto const csv_table = csv_result->view();
+
+  std::cout << "table: " << csv_table.num_rows() << " rows " << csv_table.num_columns()
+            << " columns\n";
+
+  auto st     = std::chrono::steady_clock::now();
+  auto result = redact_strings(csv_table.column(0), csv_table.column(1));
+
+  std::chrono::duration<double> elapsed = std::chrono::steady_clock::now() - st;
+  std::cout << "Wall time: " << elapsed.count() << " seconds\n";
+  std::cout << "Output size " << result->view().child(1).size() << " bytes\n";
+
+  return 0;
+}
diff --git a/cpp/examples/strings/custom_optimized.cu b/cpp/examples/strings/custom_optimized.cu
new file mode 100644
index 00000000000..bfe650daa93
--- /dev/null
+++ b/cpp/examples/strings/custom_optimized.cu
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common.hpp"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/scan.h>
+
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+/**
+ * @brief Computes the size of each output row
+ *
+ * This thread is called once per row in d_names.
+ *
+ * @param d_names Column of names
+ * @param d_visibilities Column of visibilities
+ * @param d_sizes Output sizes for each row
+ */
+__global__ void sizes_kernel(cudf::column_device_view const d_names,
+                             cudf::column_device_view const d_visibilities,
+                             cudf::size_type* d_sizes)
+{
+  // The row index is resolved from the CUDA thread/block objects
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  // There may be more threads than actual rows
+  if (index >= d_names.size()) return;
+
+  auto const visible   = cudf::string_view("public", 6);
+  auto const redaction = cudf::string_view("X X", 3);
+
+  auto const name = d_names.element<cudf::string_view>(index);
+  auto const vis  = d_visibilities.element<cudf::string_view>(index);
+
+  cudf::size_type result = redaction.size_bytes();  // init to redaction size
+  if (vis == visible) {
+    auto const space_idx    = name.find(' ');
+    auto const first        = name.substr(0, space_idx);
+    auto const last_initial = name.substr(space_idx + 1, 1);
+
+    result = first.size_bytes() + last_initial.size_bytes() + 1;
+  }
+
+  d_sizes[index] = result;
+}
+
+/**
+ * @brief Builds the output for each row
+ *
+ * This thread is called once per row in d_names.
+ *
+ * @param d_names Column of names
+ * @param d_visibilities Column of visibilities
+ * @param d_offsets Byte offset in `d_chars` for each row
+ * @param d_chars Output memory for all rows
+ */
+__global__ void redact_kernel(cudf::column_device_view const d_names,
+                              cudf::column_device_view const d_visibilities,
+                              cudf::size_type const* d_offsets,
+                              char* d_chars)
+{
+  // The row index is resolved from the CUDA thread/block objects
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  // There may be more threads than actual rows
+  if (index >= d_names.size()) return;
+
+  auto const visible   = cudf::string_view("public", 6);
+  auto const redaction = cudf::string_view("X X", 3);
+
+  // resolve output_ptr using the offsets vector
+  char* output_ptr = d_chars + d_offsets[index];
+
+  auto const name = d_names.element<cudf::string_view>(index);
+  auto const vis  = d_visibilities.element<cudf::string_view>(index);
+
+  if (vis == visible) {
+    auto const space_idx    = name.find(' ');
+    auto const first        = name.substr(0, space_idx);
+    auto const last_initial = name.substr(space_idx + 1, 1);
+    auto const output_size  = first.size_bytes() + last_initial.size_bytes() + 1;
+
+    // build output string
+    memcpy(output_ptr, last_initial.data(), last_initial.size_bytes());
+    output_ptr += last_initial.size_bytes();
+    *output_ptr++ = ' ';
+    memcpy(output_ptr, first.data(), first.size_bytes());
+  } else {
+    memcpy(output_ptr, redaction.data(), redaction.size_bytes());
+  }
+}
+
+/**
+ * @brief Redacts each name per the corresponding visibility entry
+ *
+ * This implementation builds the strings column children (offsets and chars)
+ * directly into device memory for libcudf.
+ *
+ * @param names Column of names
+ * @param visibilities Column of visibilities
+ * @return Redacted column of names
+ */
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities)
+{
+  // all device memory operations and kernel functions will run on this stream
+  auto stream = rmm::cuda_stream_default;
+
+  auto const d_names        = cudf::column_device_view::create(names, stream);
+  auto const d_visibilities = cudf::column_device_view::create(visibilities, stream);
+
+  constexpr int block_size = 128;  // this arbitrary size should be a power of 2
+  int const blocks         = (names.size() + block_size - 1) / block_size;
+
+  nvtxRangePushA("redact_strings");
+
+  // create offsets vector
+  auto offsets = rmm::device_uvector<cudf::size_type>(names.size() + 1, stream);
+
+  // compute output sizes
+  sizes_kernel<<<blocks, block_size, 0, stream.value()>>>(
+    *d_names, *d_visibilities, offsets.data());
+
+  // convert sizes to offsets (in place)
+  thrust::exclusive_scan(rmm::exec_policy(stream), offsets.begin(), offsets.end(), offsets.begin());
+
+  // last element is the total output size
+  // (device-to-host copy of 1 integer -- includes synching the stream)
+  cudf::size_type output_size = offsets.back_element(stream);
+
+  //  create chars vector
+  auto chars = rmm::device_uvector<char>(output_size, stream);
+
+  // build chars output
+  redact_kernel<<<blocks, block_size, 0, stream.value()>>>(
+    *d_names, *d_visibilities, offsets.data(), chars.data());
+
+  // create column from offsets and chars vectors (no copy is performed)
+  auto result = cudf::make_strings_column(names.size(), std::move(offsets), std::move(chars));
+
+  // wait for all of the above to finish
+  stream.synchronize();
+
+  nvtxRangePop();
+  return result;
+}
diff --git a/cpp/examples/strings/custom_prealloc.cu b/cpp/examples/strings/custom_prealloc.cu
new file mode 100644
index 00000000000..c0bae03af5c
--- /dev/null
+++ b/cpp/examples/strings/custom_prealloc.cu
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common.hpp"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/scalar/scalar.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+/**
+ * @brief Builds the output for each row
+ *
+ * This thread is called once per row in d_names.
+ *
+ * @param d_names Column of names
+ * @param d_visibilities Column of visibilities
+ * @param redaction Redacted string replacement
+ * @param working_memory Output memory for all rows
+ * @param d_offsets Byte offset in `d_chars` for each row
+ * @param d_output Output array of string_view objects
+ */
+__global__ void redact_kernel(cudf::column_device_view const d_names,
+                              cudf::column_device_view const d_visibilities,
+                              cudf::string_view redaction,
+                              char* working_memory,
+                              cudf::offset_type const* d_offsets,
+                              cudf::string_view* d_output)
+{
+  // The row index is resolved from the CUDA thread/block objects
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  // There may be more threads than actual rows
+  if (index >= d_names.size()) return;
+
+  auto const visible = cudf::string_view("public", 6);
+
+  auto const name = d_names.element<cudf::string_view>(index);
+  auto const vis  = d_visibilities.element<cudf::string_view>(index);
+  if (vis == visible) {
+    auto const space_idx    = name.find(' ');
+    auto const first        = name.substr(0, space_idx);
+    auto const last_initial = name.substr(space_idx + 1, 1);
+    auto const output_size  = first.size_bytes() + last_initial.size_bytes() + 1;
+
+    char* output_ptr = working_memory + d_offsets[index];
+    d_output[index]  = cudf::string_view{output_ptr, output_size};
+
+    // build output string
+    memcpy(output_ptr, last_initial.data(), last_initial.size_bytes());
+    output_ptr += last_initial.size_bytes();
+    *output_ptr++ = ' ';
+    memcpy(output_ptr, first.data(), first.size_bytes());
+  } else {
+    d_output[index] = cudf::string_view{redaction.data(), redaction.size_bytes()};
+  }
+}
+
+/**
+ * @brief Redacts each name per the corresponding visibility entry
+ *
+ * This implementation builds the individual strings into a fixed memory buffer
+ * and then calls a factory function to gather them into a strings column.
+ *
+ * @param names Column of names
+ * @param visibilities Column of visibilities
+ * @return Redacted column of names
+ */
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities)
+{
+  // all device memory operations and kernel functions will run on this stream
+  auto stream = rmm::cuda_stream_default;
+
+  auto const d_names        = cudf::column_device_view::create(names, stream);
+  auto const d_visibilities = cudf::column_device_view::create(visibilities, stream);
+  auto const d_redaction    = cudf::string_scalar(std::string("X X"), true, stream);
+
+  constexpr int block_size = 128;  // this arbitrary size should be a power of 2
+  auto const blocks        = (names.size() + block_size - 1) / block_size;
+
+  nvtxRangePushA("redact_strings");
+
+  auto const scv     = cudf::strings_column_view(names);
+  auto const offsets = scv.offsets_begin();
+
+  // create working memory to hold the output of each string
+  auto working_memory = rmm::device_uvector<char>(scv.chars_size(), stream);
+  // create a vector for the output strings' pointers
+  auto str_ptrs = rmm::device_uvector<cudf::string_view>(names.size(), stream);
+
+  // build the output strings
+  redact_kernel<<<blocks, block_size, 0, stream.value()>>>(*d_names,
+                                                           *d_visibilities,
+                                                           d_redaction.value(),
+                                                           working_memory.data(),
+                                                           offsets,
+                                                           str_ptrs.data());
+
+  // create strings column from the string_pairs;
+  // this copies all the individual strings into a single output column
+  auto result = cudf::make_strings_column(str_ptrs, cudf::string_view{nullptr, 0}, stream);
+  // temporary memory cleanup cost here for str_ptrs and working_memory
+
+  // wait for all of the above to finish
+  stream.synchronize();
+
+  nvtxRangePop();
+  return result;
+}
diff --git a/cpp/examples/strings/custom_with_malloc.cu b/cpp/examples/strings/custom_with_malloc.cu
new file mode 100644
index 00000000000..f1d397ef007
--- /dev/null
+++ b/cpp/examples/strings/custom_with_malloc.cu
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common.hpp"
+
+#include <cudf/column/column_device_view.cuh>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/scalar/scalar.hpp>
+
+#include <rmm/device_uvector.hpp>
+
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+/**
+ * @brief Reserve CUDA malloc heap size
+ *
+ * Call this function to change the CUDA malloc heap size limit.
+ * This value depends on the total size of all the malloc()
+ * calls needed for redact_kernel.
+ *
+ * @param heap_size Number of bytes to reserve
+ *                  Default is 1GB
+ */
+void set_malloc_heap_size(size_t heap_size = 1073741824)  // 1GB
+{
+  size_t max_malloc_heap_size = 0;
+  cudaDeviceGetLimit(&max_malloc_heap_size, cudaLimitMallocHeapSize);
+  if (max_malloc_heap_size < heap_size) {
+    max_malloc_heap_size = heap_size;
+    if (cudaDeviceSetLimit(cudaLimitMallocHeapSize, max_malloc_heap_size) != cudaSuccess) {
+      fprintf(stderr, "could not set malloc heap size to %ldMB\n", (heap_size / (1024 * 1024)));
+      throw std::runtime_error("");
+    }
+  }
+}
+
+/**
+ * @brief Builds the output for each row
+ *
+ * This thread is called once per row in d_names.
+ *
+ * Note: This uses malloc() in a device kernel which works great
+ * but is not very efficient. This can be useful for prototyping
+ * on functions where performance is not yet important.
+ * All calls to malloc() must have a corresponding free() call.
+ * The separate free_kernel is launched for this purpose.
+ *
+ * @param d_names Column of names
+ * @param d_visibilities Column of visibilities
+ * @param redaction Redacted string replacement
+ * @param d_output Output array of string_view objects
+ */
+__global__ void redact_kernel(cudf::column_device_view const d_names,
+                              cudf::column_device_view const d_visibilities,
+                              cudf::string_view redaction,
+                              cudf::string_view* d_output)
+{
+  // The row index is resolved from the CUDA thread/block objects
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  // There may be more threads than actual rows
+  if (index >= d_names.size()) return;
+
+  auto const visible = cudf::string_view("public", 6);
+
+  auto const name = d_names.element<cudf::string_view>(index);
+  auto const vis  = d_visibilities.element<cudf::string_view>(index);
+  if (vis == visible) {
+    auto const space_idx    = name.find(' ');
+    auto const first        = name.substr(0, space_idx);
+    auto const last_initial = name.substr(space_idx + 1, 1);
+    auto const output_size  = first.size_bytes() + last_initial.size_bytes() + 1;
+
+    char* output_ptr = static_cast<char*>(malloc(output_size));
+    d_output[index]  = cudf::string_view{output_ptr, output_size};
+
+    // build output string
+    memcpy(output_ptr, last_initial.data(), last_initial.size_bytes());
+    output_ptr += last_initial.size_bytes();
+    *output_ptr++ = ' ';
+    memcpy(output_ptr, first.data(), first.size_bytes());
+  } else {
+    d_output[index] = cudf::string_view{redaction.data(), redaction.size_bytes()};
+  }
+}
+
+/**
+ * @brief Frees the temporary individual string objects created in the
+ * redact_kernel
+ *
+ * Like malloc(), free() is not very efficient but must be called for
+ * each malloc() to return the memory to the CUDA malloc heap.
+ *
+ * @param redaction Redacted string replacement (not to be freed)
+ * @param d_output Output array of string_view objects to free
+ */
+__global__ void free_kernel(cudf::string_view redaction, cudf::string_view* d_output, int count)
+{
+  auto index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index >= count) return;
+
+  auto ptr = const_cast<char*>(d_output[index].data());
+  if (ptr != redaction.data()) { free(ptr); }
+}
+
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities)
+{
+  // all device memory operations and kernel functions will run on this stream
+  auto stream = rmm::cuda_stream_default;
+
+  set_malloc_heap_size();  // to illustrate adjusting the malloc heap
+
+  auto const d_names        = cudf::column_device_view::create(names, stream);
+  auto const d_visibilities = cudf::column_device_view::create(visibilities, stream);
+  auto const d_redaction    = cudf::string_scalar(std::string("X X"), true, stream);
+
+  constexpr int block_size = 128;  // this arbitrary size should be a power of 2
+  auto const blocks        = (names.size() + block_size - 1) / block_size;
+
+  nvtxRangePushA("redact_strings");
+
+  // create a vector for the output strings' pointers
+  auto str_ptrs = new rmm::device_uvector<cudf::string_view>(names.size(), stream);
+
+  auto result = [&] {
+    // build the output strings
+    redact_kernel<<<blocks, block_size, 0, stream.value()>>>(
+      *d_names, *d_visibilities, d_redaction.value(), str_ptrs->data());
+    // create strings column from the string_view vector
+    // this copies all the individual strings into a single output column
+    return cudf::make_strings_column(*str_ptrs, cudf::string_view{nullptr, 0}, stream);
+  }();
+
+  // free the individual temporary memory pointers
+  free_kernel<<<blocks, block_size, 0, stream.value()>>>(
+    d_redaction.value(), str_ptrs->data(), names.size());
+  delete str_ptrs;
+
+  // wait for all of the above to finish
+  stream.synchronize();
+
+  nvtxRangePop();
+  return result;
+}
diff --git a/cpp/examples/strings/libcudf_apis.cpp b/cpp/examples/strings/libcudf_apis.cpp
new file mode 100644
index 00000000000..009e92d8a0d
--- /dev/null
+++ b/cpp/examples/strings/libcudf_apis.cpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "common.hpp"
+
+#include <cudf/copying.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/combine.hpp>
+#include <cudf/strings/find.hpp>
+#include <cudf/strings/split/split.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/strings/substring.hpp>
+
+#include <cuda_runtime.h>
+#include <nvToolsExt.h>
+
+/**
+ * @brief Redacts each name per the corresponding visibility entry
+ *
+ * This implementation uses libcudf APIs to create the output result.
+ *
+ * @param names Column of names
+ * @param visibilities Column of visibilities
+ * @return Redacted column of names
+ */
+std::unique_ptr<cudf::column> redact_strings(cudf::column_view const& names,
+                                             cudf::column_view const& visibilities)
+{
+  auto const visible   = cudf::string_scalar(std::string("public"));
+  auto const redaction = cudf::string_scalar(std::string("X X"));
+
+  nvtxRangePushA("redact_strings");
+
+  auto const allowed      = cudf::strings::contains(visibilities, visible);
+  auto const redacted     = cudf::copy_if_else(names, redaction, allowed->view());
+  auto const first_last   = cudf::strings::split(redacted->view());
+  auto const first        = first_last->view().column(0);
+  auto const last         = first_last->view().column(1);
+  auto const last_initial = cudf::strings::slice_strings(last, 0, 1);
+
+  auto const last_initial_first = cudf::table_view({last_initial->view(), first});
+
+  auto result = cudf::strings::concatenate(last_initial_first, std::string(" "));
+
+  cudaStreamSynchronize(0);
+
+  nvtxRangePop();
+  return result;
+}
diff --git a/cpp/examples/strings/names.csv b/cpp/examples/strings/names.csv
new file mode 100644
index 00000000000..77dca3e02af
--- /dev/null
+++ b/cpp/examples/strings/names.csv
@@ -0,0 +1,20 @@
+John Doe,public
+Jane Doe,private
+Billy Joe,private
+James James,public
+Michael Frederick,public
+Christopher Cheryl,public
+Jessica Autumn,public
+Matthew Tyrone,public
+Ashley Martha,public
+Jennifer Omar,public
+Joshua Lydia,public
+Amanda Jerome,public
+Daniel Theodore,public
+David Abby,public
+James Neil,public
+Robert Shawna,private
+John Sierra,private
+Joseph Nina,private
+Andrew Tammy,private
+Ryan Nikki,public

From c265c58502e629814a036488d153516724afdebd Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 14 Oct 2022 17:22:29 -0400
Subject: [PATCH 038/202] Fix cudf::stable_sorted_order for NaN and -NaN in
 FLOAT64 columns (#11874)

Fixes bug in `cudf::stable_sorted_order` when `-NaN` and `NaN` are in a FLOAT64 (double) columns.
The code was fixed by refactoring common code with `cudf::sorted_order`. This uses thrust sort functions to help align the behavior and keep results consistent.

New gtests were added to check for this case. Some test files were also updated per issue #11734
The new tests are at the bottom of `sort_test.cpp` and `stable_sort_tests.cpp`

This was found while working on #11729
The sorted-order functions are reused for many of the libcudf sort functions so this will help with the work in #11729

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11874
---
 cpp/src/sort/sort_column.cu          | 104 +-------
 cpp/src/sort/sort_column_impl.cuh    | 152 +++++++++++
 cpp/src/sort/stable_sort_column.cu   |  72 +----
 cpp/tests/sort/sort_test.cpp         | 386 ++++++++++++++-------------
 cpp/tests/sort/stable_sort_tests.cpp | 162 ++++++-----
 5 files changed, 459 insertions(+), 417 deletions(-)
 create mode 100644 cpp/src/sort/sort_column_impl.cuh

diff --git a/cpp/src/sort/sort_column.cu b/cpp/src/sort/sort_column.cu
index 01ca36874e4..cf8b72f85ad 100644
--- a/cpp/src/sort/sort_column.cu
+++ b/cpp/src/sort/sort_column.cu
@@ -14,108 +14,16 @@
  * limitations under the License.
  */
 
-#include <sort/sort_impl.cuh>
+#include <sort/sort_column_impl.cuh>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
 
-#include <thrust/functional.h>
 #include <thrust/sequence.h>
-#include <thrust/sort.h>
 
 namespace cudf {
 namespace detail {
-namespace {
-
-/**
- * @brief Type-dispatched functor for sorting a single column.
- */
-struct column_sorted_order_fn {
-  /**
-   * @brief Compile time check for allowing radix sort for column type.
-   *
-   * Floating point is removed here for special handling of NaNs.
-   */
-  template <typename T>
-  static constexpr bool is_radix_sort_supported()
-  {
-    return cudf::is_fixed_width<T>() && !cudf::is_floating_point<T>();
-  }
-
-  /**
-   * @brief Sorts fixed-width columns using faster thrust sort.
-   *
-   * @param input Column to sort
-   * @param indices Output sorted indices
-   * @param ascending True if sort order is ascending
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   */
-  template <typename T, std::enable_if_t<is_radix_sort_supported<T>()>* = nullptr>
-  void radix_sort(column_view const& input,
-                  mutable_column_view& indices,
-                  bool ascending,
-                  rmm::cuda_stream_view stream)
-  {
-    // A non-stable sort on a column of arithmetic type with no nulls will use a radix sort
-    // if specifying only the `thrust::less` or `thrust::greater` comparators.
-    // But this also requires making a copy of the input data.
-    auto temp_col = column(input, stream);
-    auto d_col    = temp_col.mutable_view();
-    if (ascending) {
-      thrust::sort_by_key(rmm::exec_policy(stream),
-                          d_col.begin<T>(),
-                          d_col.end<T>(),
-                          indices.begin<size_type>(),
-                          thrust::less<T>());
-    } else {
-      thrust::sort_by_key(rmm::exec_policy(stream),
-                          d_col.begin<T>(),
-                          d_col.end<T>(),
-                          indices.begin<size_type>(),
-                          thrust::greater<T>());
-    }
-  }
-  template <typename T, std::enable_if_t<!is_radix_sort_supported<T>()>* = nullptr>
-  void radix_sort(column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view)
-  {
-    CUDF_FAIL("Only fixed-width types are suitable for faster sorting");
-  }
-
-  /**
-   * @brief Sorts a single column with a relationally comparable type.
-   *
-   * This includes numeric, timestamp, duration, and string types.
-   *
-   * @param input Column to sort
-   * @param indices Output sorted indices
-   * @param ascending True if sort order is ascending
-   * @param null_precedence How null rows are to be ordered
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   */
-  template <typename T, std::enable_if_t<cudf::is_relationally_comparable<T, T>()>* = nullptr>
-  void operator()(column_view const& input,
-                  mutable_column_view& indices,
-                  bool ascending,
-                  null_order null_precedence,
-                  rmm::cuda_stream_view stream)
-  {
-    // column with nulls or non-supported types will also use a comparator
-    if (input.has_nulls() || !is_radix_sort_supported<T>()) {
-      auto keys = column_device_view::create(input, stream);
-      thrust::sort(rmm::exec_policy(stream),
-                   indices.begin<size_type>(),
-                   indices.end<size_type>(),
-                   simple_comparator<T>{*keys, input.has_nulls(), ascending, null_precedence});
-    } else {
-      radix_sort<T>(input, indices, ascending, stream);
-    }
-  }
-
-  template <typename T, std::enable_if_t<!cudf::is_relationally_comparable<T, T>()>* = nullptr>
-  void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view)
-  {
-    CUDF_FAIL("Column type must be relationally comparable");
-  }
-};
-
-}  // namespace
 
 /**
  * @copydoc
@@ -134,7 +42,7 @@ std::unique_ptr<column> sorted_order<false>(column_view const& input,
   thrust::sequence(
     rmm::exec_policy(stream), indices_view.begin<size_type>(), indices_view.end<size_type>(), 0);
   cudf::type_dispatcher<dispatch_storage_type>(input.type(),
-                                               column_sorted_order_fn{},
+                                               column_sorted_order_fn<false>{},
                                                input,
                                                indices_view,
                                                column_order == order::ASCENDING,
diff --git a/cpp/src/sort/sort_column_impl.cuh b/cpp/src/sort/sort_column_impl.cuh
new file mode 100644
index 00000000000..acafe4b5a5c
--- /dev/null
+++ b/cpp/src/sort/sort_column_impl.cuh
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <sort/sort_impl.cuh>
+
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+
+namespace cudf {
+namespace detail {
+
+template <bool stable>
+struct column_sorted_order_fn {
+  /**
+   * @brief Compile time check for allowing faster sort.
+   *
+   * Faster sort is defined for fixed-width types where only
+   * the primitive comparators thrust::greater or thrust::less
+   * are needed.
+   *
+   * Floating point is removed here for special handling of NaNs
+   * which require the row-comparator.
+   */
+  template <typename T>
+  static constexpr bool is_faster_sort_supported()
+  {
+    return cudf::is_fixed_width<T>() && !cudf::is_floating_point<T>();
+  }
+
+  /**
+   * @brief Sorts fixed-width columns using faster thrust sort.
+   *
+   * Should not be called if `input.has_nulls()==true`
+   *
+   * @param input Column to sort
+   * @param indices Output sorted indices
+   * @param ascending True if sort order is ascending
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   */
+  template <typename T>
+  void faster_sort(column_view const& input,
+                   mutable_column_view& indices,
+                   bool ascending,
+                   rmm::cuda_stream_view stream)
+  {
+    // A thrust sort on a column of primitive types will use a radix sort.
+    // For other fixed-width types, thrust will use merge-sort.
+    // But this also requires making a copy of the input data.
+    auto temp_col = column(input, stream);
+    auto d_col    = temp_col.mutable_view();
+    if (ascending) {
+      if constexpr (stable) {
+        thrust::stable_sort_by_key(rmm::exec_policy(stream),
+                                   d_col.begin<T>(),
+                                   d_col.end<T>(),
+                                   indices.begin<size_type>(),
+                                   thrust::less<T>());
+      } else {
+        thrust::sort_by_key(rmm::exec_policy(stream),
+                            d_col.begin<T>(),
+                            d_col.end<T>(),
+                            indices.begin<size_type>(),
+                            thrust::less<T>());
+      }
+    } else {
+      if constexpr (stable) {
+        thrust::stable_sort_by_key(rmm::exec_policy(stream),
+                                   d_col.begin<T>(),
+                                   d_col.end<T>(),
+                                   indices.begin<size_type>(),
+                                   thrust::greater<T>());
+      } else {
+        thrust::sort_by_key(rmm::exec_policy(stream),
+                            d_col.begin<T>(),
+                            d_col.end<T>(),
+                            indices.begin<size_type>(),
+                            thrust::greater<T>());
+      }
+    }
+  }
+
+  /**
+   * @brief Sorts a single column with a relationally comparable type.
+   *
+   * This is used when a comparator is required.
+   *
+   * @param input Column to sort
+   * @param indices Output sorted indices
+   * @param ascending True if sort order is ascending
+   * @param null_precedence How null rows are to be ordered
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   */
+  template <typename T>
+  void sorted_order(column_view const& input,
+                    mutable_column_view& indices,
+                    bool ascending,
+                    null_order null_precedence,
+                    rmm::cuda_stream_view stream)
+  {
+    auto keys = column_device_view::create(input, stream);
+    auto comp = simple_comparator<T>{*keys, input.has_nulls(), ascending, null_precedence};
+    if constexpr (stable) {
+      thrust::stable_sort(
+        rmm::exec_policy(stream), indices.begin<size_type>(), indices.end<size_type>(), comp);
+    } else {
+      thrust::sort(
+        rmm::exec_policy(stream), indices.begin<size_type>(), indices.end<size_type>(), comp);
+    }
+  }
+
+  template <typename T, CUDF_ENABLE_IF(cudf::is_relationally_comparable<T, T>())>
+  void operator()(column_view const& input,
+                  mutable_column_view& indices,
+                  bool ascending,
+                  null_order null_precedence,
+                  rmm::cuda_stream_view stream)
+  {
+    if constexpr (is_faster_sort_supported<T>()) {
+      if (input.has_nulls()) {
+        sorted_order<T>(input, indices, ascending, null_precedence, stream);
+      } else {
+        faster_sort<T>(input, indices, ascending, stream);
+      }
+    } else {
+      sorted_order<T>(input, indices, ascending, null_precedence, stream);
+    }
+  }
+
+  template <typename T, CUDF_ENABLE_IF(not cudf::is_relationally_comparable<T, T>())>
+  void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view)
+  {
+    CUDF_FAIL("Column type must be relationally comparable");
+  }
+};
+
+}  // namespace detail
+}  // namespace cudf
diff --git a/cpp/src/sort/stable_sort_column.cu b/cpp/src/sort/stable_sort_column.cu
index 7f8ab778f53..d11ddef1965 100644
--- a/cpp/src/sort/stable_sort_column.cu
+++ b/cpp/src/sort/stable_sort_column.cu
@@ -14,76 +14,16 @@
  * limitations under the License.
  */
 
-#include <sort/sort_impl.cuh>
+#include <sort/sort_column_impl.cuh>
+
+#include <cudf/column/column_factories.hpp>
+#include <cudf/column/column_view.hpp>
+#include <cudf/utilities/type_dispatcher.hpp>
 
 #include <thrust/sequence.h>
-#include <thrust/sort.h>
 
 namespace cudf {
 namespace detail {
-namespace {
-
-struct column_stable_sorted_order_fn {
-  /**
-   * @brief Stable sort of fixed-width columns using a thrust sort with no comparator.
-   *
-   * @param input Column to sort
-   * @param indices Output sorted indices
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   */
-  template <typename T, std::enable_if_t<cudf::is_fixed_width<T>()>* = nullptr>
-  void faster_stable_sort(column_view const& input,
-                          mutable_column_view& indices,
-                          rmm::cuda_stream_view stream)
-  {
-    auto temp_col = column(input, stream);
-    auto d_col    = temp_col.mutable_view();
-    thrust::stable_sort_by_key(
-      rmm::exec_policy(stream), d_col.begin<T>(), d_col.end<T>(), indices.begin<size_type>());
-  }
-  template <typename T, std::enable_if_t<!cudf::is_fixed_width<T>()>* = nullptr>
-  void faster_stable_sort(column_view const&, mutable_column_view&, rmm::cuda_stream_view)
-  {
-    CUDF_FAIL("Only fixed-width types are suitable for faster stable sorting");
-  }
-
-  /**
-   * @brief Stable sorts a single column with a relationally comparable type.
-   *
-   * This includes numeric, timestamp, duration, and string types.
-   *
-   * @param input Column to sort
-   * @param indices Output sorted indices
-   * @param ascending True if sort order is ascending
-   * @param null_precedence How null rows are to be ordered
-   * @param stream CUDA stream used for device memory operations and kernel launches
-   */
-  template <typename T, std::enable_if_t<cudf::is_relationally_comparable<T, T>()>* = nullptr>
-  void operator()(column_view const& input,
-                  mutable_column_view& indices,
-                  bool ascending,
-                  null_order null_precedence,
-                  rmm::cuda_stream_view stream)
-  {
-    if (!ascending || input.has_nulls() || !cudf::is_fixed_width<T>()) {
-      auto keys = column_device_view::create(input, stream);
-      thrust::stable_sort(
-        rmm::exec_policy(stream),
-        indices.begin<size_type>(),
-        indices.end<size_type>(),
-        simple_comparator<T>{*keys, input.has_nulls(), ascending, null_precedence});
-    } else {
-      faster_stable_sort<T>(input, indices, stream);
-    }
-  }
-  template <typename T, std::enable_if_t<!cudf::is_relationally_comparable<T, T>()>* = nullptr>
-  void operator()(column_view const&, mutable_column_view&, bool, null_order, rmm::cuda_stream_view)
-  {
-    CUDF_FAIL("Column type must be relationally comparable");
-  }
-};
-
-}  // namespace
 
 /**
  * @copydoc
@@ -102,7 +42,7 @@ std::unique_ptr<column> sorted_order<true>(column_view const& input,
   thrust::sequence(
     rmm::exec_policy(stream), indices_view.begin<size_type>(), indices_view.end<size_type>(), 0);
   cudf::type_dispatcher<dispatch_storage_type>(input.type(),
-                                               column_stable_sorted_order_fn{},
+                                               column_sorted_order_fn<true>{},
                                                input,
                                                indices_view,
                                                column_order == order::ASCENDING,
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index 4092597d8e3..82af21cd7af 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -32,22 +32,20 @@
 #include <type_traits>
 #include <vector>
 
-namespace cudf {
-namespace test {
-void run_sort_test(table_view input,
-                   column_view expected_sorted_indices,
-                   std::vector<order> column_order         = {},
-                   std::vector<null_order> null_precedence = {})
+void run_sort_test(cudf::table_view input,
+                   cudf::column_view expected_sorted_indices,
+                   std::vector<cudf::order> column_order         = {},
+                   std::vector<cudf::null_order> null_precedence = {})
 {
   // Sorted table
-  auto got_sorted_table      = sort(input, column_order, null_precedence);
-  auto expected_sorted_table = gather(input, expected_sorted_indices);
+  auto got_sorted_table      = cudf::sort(input, column_order, null_precedence);
+  auto expected_sorted_table = cudf::gather(input, expected_sorted_indices);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sorted_table->view(), got_sorted_table->view());
 
   // Sorted by key
-  auto got_sort_by_key_table      = sort_by_key(input, input, column_order, null_precedence);
-  auto expected_sort_by_key_table = gather(input, expected_sorted_indices);
+  auto got_sort_by_key_table      = cudf::sort_by_key(input, input, column_order, null_precedence);
+  auto expected_sort_by_key_table = cudf::gather(input, expected_sorted_indices);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
 }
@@ -56,7 +54,7 @@ using TestTypes = cudf::test::Concat<cudf::test::NumericTypes,  // include integ
                                      cudf::test::ChronoTypes>;  // include timestamps and durations
 
 template <typename T>
-struct Sort : public BaseFixture {
+struct Sort : public cudf::test::BaseFixture {
 };
 
 TYPED_TEST_SUITE(Sort, TestTypes);
@@ -65,17 +63,19 @@ TYPED_TEST(Sort, WithNullMax)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2, 10}, {1, 1, 0, 1, 1, 1}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2, 10}, {1, 1, 0, 1, 1, 1}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{1, 0, 5, 3, 4, 2}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
-  std::vector<null_order> null_precedence{null_order::AFTER, null_order::AFTER, null_order::AFTER};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{1, 0, 5, 3, 4, 2}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};
+  std::vector<cudf::null_order> null_precedence{
+    cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER};
 
   // Sorted order
-  auto got = sorted_order(input, column_order, null_precedence);
+  auto got = cudf::sorted_order(input, column_order, null_precedence);
 
   if (!std::is_same_v<T, bool>) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
@@ -85,7 +85,7 @@ TYPED_TEST(Sort, WithNullMax)
   } else {
     // for bools only validate that the null element landed at the back, since
     // the rest of the values are equivalent and yields random sorted order.
-    auto to_host = [](column_view const& col) {
+    auto to_host = [](cudf::column_view const& col) {
       thrust::host_vector<int32_t> h_data(col.size());
       CUDF_CUDA_TRY(cudaMemcpy(
         h_data.data(), col.data<int32_t>(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
@@ -96,7 +96,7 @@ TYPED_TEST(Sort, WithNullMax)
     EXPECT_EQ(h_exp[h_exp.size() - 1], h_got[h_got.size() - 1]);
 
     // Run test for sort and sort_by_key
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{0, 3, 5, 1, 4, 2}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{0, 3, 5, 1, 4, 2}};
     run_sort_test(input, expected_for_bool, column_order, null_precedence);
   }
 }
@@ -105,15 +105,16 @@ TYPED_TEST(Sort, WithNullMin)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}, {1, 1, 0, 1, 1}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}, {1, 1, 0, 1, 1}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};
 
-  auto got = sorted_order(input, column_order);
+  auto got = cudf::sorted_order(input, column_order);
 
   if (!std::is_same_v<T, bool>) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
@@ -123,7 +124,7 @@ TYPED_TEST(Sort, WithNullMin)
   } else {
     // for bools only validate that the null element landed at the front, since
     // the rest of the values are equivalent and yields random sorted order.
-    auto to_host = [](column_view const& col) {
+    auto to_host = [](cudf::column_view const& col) {
       thrust::host_vector<int32_t> h_data(col.size());
       CUDF_CUDA_TRY(cudaMemcpy(
         h_data.data(), col.data<int32_t>(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
@@ -134,7 +135,7 @@ TYPED_TEST(Sort, WithNullMin)
     EXPECT_EQ(h_exp.front(), h_got.front());
 
     // Run test for sort and sort_by_key
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
     run_sort_test(input, expected_for_bool, column_order);
   }
 }
@@ -143,23 +144,25 @@ TYPED_TEST(Sort, WithMixedNullOrder)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {0, 0, 1, 1, 0}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {0, 1, 0, 0, 1});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}, {1, 0, 1, 0, 1}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {0, 0, 1, 1, 0}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {0, 1, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}, {1, 0, 1, 0, 1}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 3, 0, 1, 4}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::ASCENDING};
-  std::vector<null_order> null_precedence{null_order::AFTER, null_order::BEFORE, null_order::AFTER};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 3, 0, 1, 4}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::ASCENDING};
+  std::vector<cudf::null_order> null_precedence{
+    cudf::null_order::AFTER, cudf::null_order::BEFORE, cudf::null_order::AFTER};
 
-  auto got = sorted_order(input, column_order, null_precedence);
+  auto got = cudf::sorted_order(input, column_order, null_precedence);
 
   if (!std::is_same_v<T, bool>) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
   } else {
     // for bools only validate that the null element landed at the front, since
     // the rest of the values are equivalent and yields random sorted order.
-    auto to_host = [](column_view const& col) {
+    auto to_host = [](cudf::column_view const& col) {
       thrust::host_vector<int32_t> h_data(col.size());
       CUDF_CUDA_TRY(cudaMemcpy(
         h_data.data(), col.data<int32_t>(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
@@ -178,15 +181,16 @@ TYPED_TEST(Sort, WithAllValid)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};
 
-  auto got = sorted_order(input, column_order);
+  auto got = cudf::sorted_order(input, column_order);
 
   // Skip validating bools order. Valid true bools are all
   // equivalent, and yield random order after thrust::sort
@@ -197,7 +201,7 @@ TYPED_TEST(Sort, WithAllValid)
     run_sort_test(input, expected, column_order);
   } else {
     // Run test for sort and sort_by_key
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
     run_sort_test(input, expected_for_bool, column_order);
   }
 }
@@ -224,16 +228,18 @@ TYPED_TEST(Sort, WithStructColumn)
   auto struct_col_view{struct_col->view()};
   EXPECT_EQ(num_rows, struct_col->size());
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 9}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k", "a"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2, 20}};
-  table_view input{{col1, col2, col3, struct_col_view}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 9}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "a"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2, 20}};
+  cudf::table_view input{{col1, col2, col3, struct_col_view}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4, 5}};
-  std::vector<order> column_order{
-    order::ASCENDING, order::ASCENDING, order::DESCENDING, order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4, 5}};
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING,
+                                        cudf::order::ASCENDING,
+                                        cudf::order::DESCENDING,
+                                        cudf::order::ASCENDING};
 
-  auto got = sorted_order(input, column_order);
+  auto got = cudf::sorted_order(input, column_order);
 
   // Skip validating bools order. Valid true bools are all
   // equivalent, and yield random order after thrust::sort
@@ -244,7 +250,7 @@ TYPED_TEST(Sort, WithStructColumn)
     run_sort_test(input, expected, column_order);
   } else {
     // Run test for sort and sort_by_key
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 5, 3, 0, 1, 4}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 5, 3, 0, 1, 4}};
     run_sort_test(input, expected_for_bool, column_order);
   }
 }
@@ -271,14 +277,15 @@ TYPED_TEST(Sort, WithNestedStructColumn)
 
   auto struct_col_view{struct_col2->view()};
 
-  fixed_width_column_wrapper<T> col1{{6, 6, 6, 6, 6, 6}};
-  fixed_width_column_wrapper<T> col2{{1, 1, 1, 2, 2, 2}};
-  table_view input{{col1, col2, struct_col_view}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{6, 6, 6, 6, 6, 6}};
+  cudf::test::fixed_width_column_wrapper<T> col2{{1, 1, 1, 2, 2, 2}};
+  cudf::table_view input{{col1, col2, struct_col_view}};
 
-  fixed_width_column_wrapper<int32_t> expected{{3, 5, 4, 2, 1, 0}};
-  std::vector<order> column_order{order::ASCENDING, order::DESCENDING, order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{3, 5, 4, 2, 1, 0}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::DESCENDING, cudf::order::ASCENDING};
 
-  auto got = sorted_order(input, column_order);
+  auto got = cudf::sorted_order(input, column_order);
 
   // Skip validating bools order. Valid true bools are all
   // equivalent, and yield random order after thrust::sort
@@ -289,7 +296,7 @@ TYPED_TEST(Sort, WithNestedStructColumn)
     run_sort_test(input, expected, column_order);
   } else {
     // Run test for sort and sort_by_key
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 5, 1, 3, 4, 0}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 5, 1, 3, 4, 0}};
     run_sort_test(input, expected_for_bool, column_order);
   }
 }
@@ -346,7 +353,7 @@ TYPED_TEST(Sort, WithNullableStructColumn)
     auto s1 = make_struct(std::move(s1_children), s1_mask);
 
     auto expect = fwcw{4, 5, 7, 3, 2, 0, 6, 1, 8};
-    run_sort_test(table_view({s1->view()}), expect);
+    run_sort_test(cudf::table_view({s1->view()}), expect);
   }
   { /*
         /+-------------+
@@ -384,7 +391,7 @@ TYPED_TEST(Sort, WithNullableStructColumn)
     auto s12 = make_struct(std::move(s12_children), s1_mask);
 
     auto expect = fwcw{4, 5, 7, 0, 6, 1, 2, 3, 8};
-    run_sort_test(table_view({s12->view()}), expect);
+    run_sort_test(cudf::table_view({s12->view()}), expect);
   }
 }
 
@@ -406,12 +413,12 @@ TYPED_TEST(Sort, WithSingleStructColumn)
   auto struct_col =
     cudf::test::structs_column_wrapper{{names_col, ages_col, is_human_col}, v}.release();
   auto struct_col_view{struct_col->view()};
-  table_view input{{struct_col_view}};
+  cudf::table_view input{{struct_col_view}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 5, 1, 3, 4, 0}};
-  std::vector<order> column_order{order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 5, 1, 3, 4, 0}};
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING};
 
-  auto got = sorted_order(input, column_order);
+  auto got = cudf::sorted_order(input, column_order);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
@@ -443,13 +450,13 @@ TYPED_TEST(Sort, WithSlicedStructColumn)
   auto col2 =                           FWCW{{    1,     1,     0,     0,    0,    2,   1,   3}};
   auto col3 =                           FWCW{{    7,     8,     1,     1,    9,    5,   7,   3}};
   auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()};
-  auto struct_col = structs_column_wrapper{{col1, col2, col3}}.release();
+  auto struct_col = cudf::test::structs_column_wrapper{{col1, col2, col3}}.release();
   // clang-format on
   auto struct_col_view{struct_col->view()};
-  table_view input{{struct_col_view}};
-  auto sliced_columns = cudf::split(struct_col_view, std::vector<size_type>{3});
-  auto sliced_tables  = cudf::split(input, std::vector<size_type>{3});
-  std::vector<order> column_order{order::ASCENDING};
+  cudf::table_view input{{struct_col_view}};
+  auto sliced_columns = cudf::split(struct_col_view, std::vector<cudf::size_type>{3});
+  auto sliced_tables  = cudf::split(input, std::vector<cudf::size_type>{3});
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING};
   /*
         asce_null_first   sliced[3:]
       /+-------------+
@@ -467,30 +474,30 @@ TYPED_TEST(Sort, WithSlicedStructColumn)
   */
 
   // normal
-  fixed_width_column_wrapper<int32_t> expected{{7, 2, 4, 3, 6, 0, 1, 5}};
-  auto got = sorted_order(input, column_order);
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{7, 2, 4, 3, 6, 0, 1, 5}};
+  auto got = cudf::sorted_order(input, column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
   // Run test for sort and sort_by_key
   run_sort_test(input, expected, column_order);
 
   // table with sliced column
-  table_view input2{{sliced_columns[1]}};
-  fixed_width_column_wrapper<int32_t> expected2{{4, 1, 0, 3, 2}};
-  got = sorted_order(input2, column_order);
+  cudf::table_view input2{{sliced_columns[1]}};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected2{{4, 1, 0, 3, 2}};
+  got = cudf::sorted_order(input2, column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
   // Run test for sort and sort_by_key
   run_sort_test(input2, expected2, column_order);
 
   // sliced table[1]
-  fixed_width_column_wrapper<int32_t> expected3{{4, 1, 0, 3, 2}};
-  got = sorted_order(sliced_tables[1], column_order);
+  cudf::test::fixed_width_column_wrapper<int32_t> expected3{{4, 1, 0, 3, 2}};
+  got = cudf::sorted_order(sliced_tables[1], column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view());
   // Run test for sort and sort_by_key
   run_sort_test(sliced_tables[1], expected3, column_order);
 
   // sliced table[0]
-  fixed_width_column_wrapper<int32_t> expected4{{2, 0, 1}};
-  got = sorted_order(sliced_tables[0], column_order);
+  cudf::test::fixed_width_column_wrapper<int32_t> expected4{{2, 0, 1}};
+  got = cudf::sorted_order(sliced_tables[0], column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
   // Run test for sort and sort_by_key
   run_sort_test(sliced_tables[0], expected4, column_order);
@@ -507,25 +514,25 @@ TYPED_TEST(Sort, SlicedColumns)
   auto col2 =                           FWCW{{    7,     8,     1,     1,    9,    5,   7,   3}};
   auto col1 = cudf::test::strings_column_wrapper{names.begin(), names.end(), string_valids.begin()};
   // clang-format on
-  table_view input{{col1, col2}};
-  auto sliced_columns1 = cudf::split(col1, std::vector<size_type>{3});
-  auto sliced_columns2 = cudf::split(col1, std::vector<size_type>{3});
-  auto sliced_tables   = cudf::split(input, std::vector<size_type>{3});
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING};
+  cudf::table_view input{{col1, col2}};
+  auto sliced_columns1 = cudf::split(col1, std::vector<cudf::size_type>{3});
+  auto sliced_columns2 = cudf::split(col1, std::vector<cudf::size_type>{3});
+  auto sliced_tables   = cudf::split(input, std::vector<cudf::size_type>{3});
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING, cudf::order::ASCENDING};
 
   // normal
-  // fixed_width_column_wrapper<int32_t> expected{{2, 3, 7, 5, 0, 6, 1, 4}};
-  fixed_width_column_wrapper<int32_t> expected{{7, 2, 4, 3, 6, 0, 1, 5}};
-  auto got = sorted_order(input, column_order);
+  // cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 3, 7, 5, 0, 6, 1, 4}};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{7, 2, 4, 3, 6, 0, 1, 5}};
+  auto got = cudf::sorted_order(input, column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
   // Run test for sort and sort_by_key
   run_sort_test(input, expected, column_order);
 
   // table with sliced column
-  table_view input2{{sliced_columns1[1], sliced_columns2[1]}};
-  // fixed_width_column_wrapper<int32_t> expected2{{0, 4, 2, 3, 1}};
-  fixed_width_column_wrapper<int32_t> expected2{{4, 1, 0, 3, 2}};
-  got = sorted_order(input2, column_order);
+  cudf::table_view input2{{sliced_columns1[1], sliced_columns2[1]}};
+  // cudf::test::fixed_width_column_wrapper<int32_t> expected2{{0, 4, 2, 3, 1}};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected2{{4, 1, 0, 3, 2}};
+  got = cudf::sorted_order(input2, column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
   // Run test for sort and sort_by_key
   run_sort_test(input2, expected2, column_order);
@@ -572,37 +579,37 @@ TYPED_TEST(Sort, WithStructColumnCombinations)
   */
   // clang-format on
   auto struct_col_view{struct_col->view()};
-  table_view input{{struct_col_view}};
-  std::vector<order> column_order1{order::DESCENDING};
+  cudf::table_view input{{struct_col_view}};
+  std::vector<cudf::order> column_order1{cudf::order::DESCENDING};
 
   // desc_nulls_first
-  fixed_width_column_wrapper<int32_t> expected1{{2, 4, 3, 5, 6, 7, 1, 0}};
-  auto got = sorted_order(input, column_order1, {null_order::AFTER});
+  cudf::test::fixed_width_column_wrapper<int32_t> expected1{{2, 4, 3, 5, 6, 7, 1, 0}};
+  auto got = cudf::sorted_order(input, column_order1, {cudf::null_order::AFTER});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected1, column_order1, {null_order::AFTER});
+  run_sort_test(input, expected1, column_order1, {cudf::null_order::AFTER});
 
   // desc_nulls_last
-  fixed_width_column_wrapper<int32_t> expected2{{1, 0, 6, 7, 3, 5, 2, 4}};
-  got = sorted_order(input, column_order1, {null_order::BEFORE});
+  cudf::test::fixed_width_column_wrapper<int32_t> expected2{{1, 0, 6, 7, 3, 5, 2, 4}};
+  got = cudf::sorted_order(input, column_order1, {cudf::null_order::BEFORE});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected2, column_order1, {null_order::BEFORE});
+  run_sort_test(input, expected2, column_order1, {cudf::null_order::BEFORE});
 
   // asce_nulls_first
-  std::vector<order> column_order2{order::ASCENDING};
-  fixed_width_column_wrapper<int32_t> expected3{{2, 4, 3, 5, 7, 6, 0, 1}};
-  got = sorted_order(input, column_order2, {null_order::BEFORE});
+  std::vector<cudf::order> column_order2{cudf::order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected3{{2, 4, 3, 5, 7, 6, 0, 1}};
+  got = cudf::sorted_order(input, column_order2, {cudf::null_order::BEFORE});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected3, column_order2, {null_order::BEFORE});
+  run_sort_test(input, expected3, column_order2, {cudf::null_order::BEFORE});
 
   // asce_nulls_last
-  fixed_width_column_wrapper<int32_t> expected4{{0, 1, 7, 6, 3, 5, 2, 4}};
-  got = sorted_order(input, column_order2, {null_order::AFTER});
+  cudf::test::fixed_width_column_wrapper<int32_t> expected4{{0, 1, 7, 6, 3, 5, 2, 4}};
+  got = cudf::sorted_order(input, column_order2, {cudf::null_order::AFTER});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected4, column_order2, {null_order::AFTER});
+  run_sort_test(input, expected4, column_order2, {cudf::null_order::AFTER});
 }
 
 TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
@@ -645,93 +652,94 @@ TYPED_TEST(Sort, WithStructColumnCombinationsWithoutNulls)
   */
   // clang-format on
   auto struct_col_view{struct_col->view()};
-  table_view input{{struct_col_view}};
-  std::vector<order> column_order{order::DESCENDING};
+  cudf::table_view input{{struct_col_view}};
+  std::vector<cudf::order> column_order{cudf::order::DESCENDING};
 
   // desc_nulls_first
   auto const expected1 = []() {
     if constexpr (std::is_same_v<T, bool>) {
-      return fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 1, 2, 4, 0}};
+      return cudf::test::fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 1, 2, 4, 0}};
     }
-    return fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 2, 4, 1, 0}};
+    return cudf::test::fixed_width_column_wrapper<int32_t>{{3, 5, 6, 7, 2, 4, 1, 0}};
   }();
-  auto got = sorted_order(input, column_order, {null_order::AFTER});
+  auto got = cudf::sorted_order(input, column_order, {cudf::null_order::AFTER});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected1, column_order, {null_order::AFTER});
+  run_sort_test(input, expected1, column_order, {cudf::null_order::AFTER});
 
   // desc_nulls_last
-  fixed_width_column_wrapper<int32_t> expected2{{2, 4, 1, 0, 6, 7, 3, 5}};
-  got = sorted_order(input, column_order, {null_order::BEFORE});
+  cudf::test::fixed_width_column_wrapper<int32_t> expected2{{2, 4, 1, 0, 6, 7, 3, 5}};
+  got = cudf::sorted_order(input, column_order, {cudf::null_order::BEFORE});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected2, column_order, {null_order::BEFORE});
+  run_sort_test(input, expected2, column_order, {cudf::null_order::BEFORE});
 
   // asce_nulls_first
-  std::vector<order> column_order2{order::ASCENDING};
-  fixed_width_column_wrapper<int32_t> expected3{{3, 5, 7, 6, 0, 1, 2, 4}};
-  got = sorted_order(input, column_order2, {null_order::BEFORE});
+  std::vector<cudf::order> column_order2{cudf::order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected3{{3, 5, 7, 6, 0, 1, 2, 4}};
+  got = cudf::sorted_order(input, column_order2, {cudf::null_order::BEFORE});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected3, column_order2, {null_order::BEFORE});
+  run_sort_test(input, expected3, column_order2, {cudf::null_order::BEFORE});
 
   // asce_nulls_last
   auto const expected4 = []() {
     if constexpr (std::is_same_v<T, bool>) {
-      return fixed_width_column_wrapper<int32_t>{{0, 2, 4, 1, 7, 6, 3, 5}};
+      return cudf::test::fixed_width_column_wrapper<int32_t>{{0, 2, 4, 1, 7, 6, 3, 5}};
     }
-    return fixed_width_column_wrapper<int32_t>{{0, 1, 2, 4, 7, 6, 3, 5}};
+    return cudf::test::fixed_width_column_wrapper<int32_t>{{0, 1, 2, 4, 7, 6, 3, 5}};
   }();
-  got = sorted_order(input, column_order2, {null_order::AFTER});
+  got = cudf::sorted_order(input, column_order2, {cudf::null_order::AFTER});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected4, got->view());
   // Run test for sort and sort_by_key
-  run_sort_test(input, expected4, column_order2, {null_order::AFTER});
+  run_sort_test(input, expected4, column_order2, {cudf::null_order::AFTER});
 }
 
-TYPED_TEST(Sort, MisMatchInColumnOrderSize)
+TYPED_TEST(Sort, MismatchInColumnOrderSize)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  std::vector<order> column_order{order::ASCENDING, order::DESCENDING};
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING, cudf::order::DESCENDING};
 
-  EXPECT_THROW(sorted_order(input, column_order), logic_error);
-  EXPECT_THROW(sort(input, column_order), logic_error);
-  EXPECT_THROW(sort_by_key(input, input, column_order), logic_error);
+  EXPECT_THROW(cudf::sorted_order(input, column_order), cudf::logic_error);
+  EXPECT_THROW(cudf::sort(input, column_order), cudf::logic_error);
+  EXPECT_THROW(cudf::sort_by_key(input, input, column_order), cudf::logic_error);
 }
 
-TYPED_TEST(Sort, MisMatchInNullPrecedenceSize)
+TYPED_TEST(Sort, MismatchInNullPrecedenceSize)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  std::vector<order> column_order{order::ASCENDING, order::DESCENDING, order::DESCENDING};
-  std::vector<null_order> null_precedence{null_order::AFTER, null_order::BEFORE};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::DESCENDING, cudf::order::DESCENDING};
+  std::vector<cudf::null_order> null_precedence{cudf::null_order::AFTER, cudf::null_order::BEFORE};
 
-  EXPECT_THROW(sorted_order(input, column_order, null_precedence), logic_error);
-  EXPECT_THROW(sort(input, column_order, null_precedence), logic_error);
-  EXPECT_THROW(sort_by_key(input, input, column_order, null_precedence), logic_error);
+  EXPECT_THROW(cudf::sorted_order(input, column_order, null_precedence), cudf::logic_error);
+  EXPECT_THROW(cudf::sort(input, column_order, null_precedence), cudf::logic_error);
+  EXPECT_THROW(cudf::sort_by_key(input, input, column_order, null_precedence), cudf::logic_error);
 }
 
 TYPED_TEST(Sort, ZeroSizedColumns)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{};
-  table_view input{{col1}};
+  cudf::test::fixed_width_column_wrapper<T> col1{};
+  cudf::table_view input{{col1}};
 
-  fixed_width_column_wrapper<int32_t> expected{};
-  std::vector<order> column_order{order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{};
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING};
 
-  auto got = sorted_order(input, column_order);
+  auto got = cudf::sorted_order(input, column_order);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
@@ -837,33 +845,31 @@ TYPED_TEST(Sort, WithEmptyListColumn)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expect, *result);
 }
 
-struct SortByKey : public BaseFixture {
+struct SortByKey : public cudf::test::BaseFixture {
 };
 
 TEST_F(SortByKey, ValueKeysSizeMismatch)
 {
   using T = int64_t;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view values{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view values{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<T> key_col{{5, 4, 3, 5}};
-  table_view keys{{key_col}};
+  cudf::test::fixed_width_column_wrapper<T> key_col{{5, 4, 3, 5}};
+  cudf::table_view keys{{key_col}};
 
-  EXPECT_THROW(sort_by_key(values, keys), logic_error);
+  EXPECT_THROW(cudf::sort_by_key(values, keys), cudf::logic_error);
 }
 
 template <typename T>
-struct FixedPointTestAllReps : public cudf::test::BaseFixture {
+struct SortFixedPointTest : public cudf::test::BaseFixture {
 };
 
-template <typename T>
-using wrapper = cudf::test::fixed_width_column_wrapper<T>;
-TYPED_TEST_SUITE(FixedPointTestAllReps, cudf::test::FixedPointTypes);
+TYPED_TEST_SUITE(SortFixedPointTest, cudf::test::FixedPointTypes);
 
-TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather)
+TYPED_TEST(SortFixedPointTest, SortedOrderGather)
 {
   using namespace numeric;
   using decimalXX = TypeParam;
@@ -878,9 +884,12 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather)
   auto const index_vec  = std::vector<cudf::size_type>{2, 1, 0, 4, 3};
   auto const sorted_vec = std::vector<decimalXX>{ZERO, ONE, TWO, THREE, FOUR};
 
-  auto const input_col  = wrapper<decimalXX>(input_vec.begin(), input_vec.end());
-  auto const index_col  = wrapper<cudf::size_type>(index_vec.begin(), index_vec.end());
-  auto const sorted_col = wrapper<decimalXX>(sorted_vec.begin(), sorted_vec.end());
+  auto const input_col =
+    cudf::test::fixed_width_column_wrapper<decimalXX>(input_vec.begin(), input_vec.end());
+  auto const index_col =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>(index_vec.begin(), index_vec.end());
+  auto const sorted_col =
+    cudf::test::fixed_width_column_wrapper<decimalXX>(sorted_vec.begin(), sorted_vec.end());
 
   auto const sorted_table = cudf::table_view{{sorted_col}};
   auto const input_table  = cudf::table_view{{input_col}};
@@ -892,25 +901,27 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointSortedOrderGather)
   CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view());
 }
 
-struct SortCornerTest : public BaseFixture {
+struct SortCornerTest : public cudf::test::BaseFixture {
 };
 
 TEST_F(SortCornerTest, WithEmptyStructColumn)
 {
-  using int_col = fixed_width_column_wrapper<int32_t>;
+  using int_col = cudf::test::fixed_width_column_wrapper<int32_t>;
 
   // struct{}, int, int
   int_col col_for_mask{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 1, 1}};
-  auto null_mask  = cudf::copy_bitmask(col_for_mask.release()->view());
-  auto struct_col = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask));
+  auto null_mask = cudf::copy_bitmask(col_for_mask.release()->view());
+  auto struct_col =
+    cudf::make_structs_column(6, {}, cudf::UNKNOWN_NULL_COUNT, std::move(null_mask));
 
   int_col col1{{1, 2, 3, 1, 2, 3}};
   int_col col2{{1, 1, 1, 2, 2, 2}};
-  table_view input{{struct_col->view(), col1, col2}};
+  cudf::table_view input{{struct_col->view(), col1, col2}};
 
   int_col expected{{1, 0, 3, 4, 2, 5}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::ASCENDING};
-  auto got = sorted_order(input, column_order);
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::ASCENDING};
+  auto got = cudf::sorted_order(input, column_order);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
   // struct{struct{}, int}
@@ -920,17 +931,18 @@ TEST_F(SortCornerTest, WithEmptyStructColumn)
   child_columns.push_back(col3.release());
   auto struct_col2 =
     cudf::make_structs_column(6, std::move(child_columns), 0, rmm::device_buffer{});
-  table_view input2{{struct_col2->view()}};
+  cudf::table_view input2{{struct_col2->view()}};
 
   int_col expected2{{5, 4, 3, 2, 0, 1}};
-  auto got2 = sorted_order(input2, {order::DESCENDING});
+  auto got2 = cudf::sorted_order(input2, {cudf::order::DESCENDING});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, got2->view());
 
   // struct{struct{}, struct{int}}
   int_col col_for_mask2{{0, 0, 0, 0, 0, 0}, {1, 0, 1, 1, 0, 1}};
   auto null_mask2 = cudf::copy_bitmask(col_for_mask2.release()->view());
   std::vector<std::unique_ptr<cudf::column>> child_columns2;
-  auto child_col_1 = cudf::make_structs_column(6, {}, UNKNOWN_NULL_COUNT, std::move(null_mask2));
+  auto child_col_1 =
+    cudf::make_structs_column(6, {}, cudf::UNKNOWN_NULL_COUNT, std::move(null_mask2));
   child_columns2.push_back(std::move(child_col_1));
   int_col col4{{5, 4, 3, 2, 1, 0}};
   std::vector<std::unique_ptr<cudf::column>> grand_child;
@@ -939,14 +951,26 @@ TEST_F(SortCornerTest, WithEmptyStructColumn)
   child_columns2.push_back(std::move(child_col_2));
   auto struct_col3 =
     cudf::make_structs_column(6, std::move(child_columns2), 0, rmm::device_buffer{});
-  table_view input3{{struct_col3->view()}};
+  cudf::table_view input3{{struct_col3->view()}};
 
   int_col expected3{{4, 1, 5, 3, 2, 0}};
-  auto got3 = sorted_order(input3, {order::ASCENDING});
+  auto got3 = cudf::sorted_order(input3, {cudf::order::ASCENDING});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected3, got3->view());
 };
 
-}  // namespace test
-}  // namespace cudf
+using SortDouble = Sort<double>;
+TEST_F(SortDouble, InfinityAndNan)
+{
+  auto constexpr NaN = std::numeric_limits<double>::quiet_NaN();
+  auto constexpr Inf = std::numeric_limits<double>::infinity();
+
+  auto input = cudf::test::fixed_width_column_wrapper<double>(
+    {-0.0, -NaN, -NaN, NaN, Inf, -Inf, 7.0, 5.0, 6.0, NaN, Inf, -Inf, -NaN, -NaN, -0.0});
+  auto expected =  // -inf,-inf,-0,-0,5,6,7,inf,inf,-nan,-nan,nan,nan,-nan,-nan
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+      {5, 11, 0, 14, 7, 8, 6, 4, 10, 1, 2, 3, 9, 12, 13});
+  auto results = cudf::sorted_order(cudf::table_view({input}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+}
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/sort/stable_sort_tests.cpp b/cpp/tests/sort/stable_sort_tests.cpp
index ee43c9e7b4b..57ad6361ad6 100644
--- a/cpp/tests/sort/stable_sort_tests.cpp
+++ b/cpp/tests/sort/stable_sort_tests.cpp
@@ -31,15 +31,13 @@
 #include <type_traits>
 #include <vector>
 
-namespace cudf {
-namespace test {
-void run_stable_sort_test(table_view input,
-                          column_view expected_sorted_indices,
-                          std::vector<order> column_order         = {},
-                          std::vector<null_order> null_precedence = {})
+void run_stable_sort_test(cudf::table_view input,
+                          cudf::column_view expected_sorted_indices,
+                          std::vector<cudf::order> column_order         = {},
+                          std::vector<cudf::null_order> null_precedence = {})
 {
-  auto got_sort_by_key_table      = sort_by_key(input, input, column_order, null_precedence);
-  auto expected_sort_by_key_table = gather(input, expected_sorted_indices);
+  auto got_sort_by_key_table      = cudf::sort_by_key(input, input, column_order, null_precedence);
+  auto expected_sort_by_key_table = cudf::gather(input, expected_sorted_indices);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_sort_by_key_table->view(), got_sort_by_key_table->view());
 }
@@ -48,7 +46,7 @@ using TestTypes = cudf::test::Concat<cudf::test::NumericTypes,  // include integ
                                      cudf::test::ChronoTypes>;  // include timestamps and durations
 
 template <typename T>
-struct StableSort : public BaseFixture {
+struct StableSort : public cudf::test::BaseFixture {
 };
 
 TYPED_TEST_SUITE(StableSort, TestTypes);
@@ -58,14 +56,16 @@ TYPED_TEST(StableSort, MixedNullOrder)
   using T = TypeParam;
   using R = int32_t;
 
-  fixed_width_column_wrapper<T> col1({0, 1, 1, 0, 0, 1, 0, 1}, {0, 1, 1, 1, 1, 1, 1, 1});
-  strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"}, {1, 1, 1, 1, 0, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> col1({0, 1, 1, 0, 0, 1, 0, 1},
+                                                 {0, 1, 1, 1, 1, 1, 1, 1});
+  cudf::test::strings_column_wrapper col2({"2", "a", "b", "x", "k", "a", "x", "a"},
+                                          {1, 1, 1, 1, 0, 1, 1, 1});
 
-  fixed_width_column_wrapper<R> expected{{4, 3, 6, 1, 5, 7, 2, 0}};
+  cudf::test::fixed_width_column_wrapper<R> expected{{4, 3, 6, 1, 5, 7, 2, 0}};
 
-  auto got = stable_sorted_order(table_view({col1, col2}),
-                                 {order::ASCENDING, order::ASCENDING},
-                                 {null_order::AFTER, null_order::BEFORE});
+  auto got = cudf::stable_sorted_order(cudf::table_view({col1, col2}),
+                                       {cudf::order::ASCENDING, cudf::order::ASCENDING},
+                                       {cudf::null_order::AFTER, cudf::null_order::BEFORE});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 }
@@ -74,16 +74,18 @@ TYPED_TEST(StableSort, WithNullMax)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 10, 2, 10}, {1, 1, 0, 1, 1, 1}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 10, 2, 10}, {1, 1, 0, 1, 1, 1}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{1, 0, 3, 5, 4, 2}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
-  std::vector<null_order> null_precedence{null_order::AFTER, null_order::AFTER, null_order::AFTER};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{1, 0, 3, 5, 4, 2}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};
+  std::vector<cudf::null_order> null_precedence{
+    cudf::null_order::AFTER, cudf::null_order::AFTER, cudf::null_order::AFTER};
 
-  auto got = stable_sorted_order(input, column_order, null_precedence);
+  auto got = cudf::stable_sorted_order(input, column_order, null_precedence);
 
   if (not std::is_same_v<T, bool>) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
@@ -92,7 +94,7 @@ TYPED_TEST(StableSort, WithNullMax)
   } else {
     // for bools only validate that the null element landed at the back, since
     // the rest of the values are equivalent and yields random sorted order.
-    auto to_host = [](column_view const& col) {
+    auto to_host = [](cudf::column_view const& col) {
       thrust::host_vector<int32_t> h_data(col.size());
       CUDF_CUDA_TRY(cudaMemcpy(
         h_data.data(), col.data<int32_t>(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
@@ -102,7 +104,7 @@ TYPED_TEST(StableSort, WithNullMax)
     thrust::host_vector<int32_t> h_got = to_host(got->view());
     EXPECT_EQ(h_exp[h_exp.size() - 1], h_got[h_got.size() - 1]);
 
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{0, 3, 5, 1, 4, 2}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{0, 3, 5, 1, 4, 2}};
     run_stable_sort_test(input, expected_for_bool, column_order, null_precedence);
   }
 }
@@ -111,15 +113,16 @@ TYPED_TEST(StableSort, WithNullMin)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 10, 2}, {1, 1, 0, 1, 1}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 10, 2}, {1, 1, 0, 1, 1}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};
 
-  auto got = stable_sorted_order(input, column_order);
+  auto got = cudf::stable_sorted_order(input, column_order);
 
   if (!std::is_same_v<T, bool>) {
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
@@ -128,7 +131,7 @@ TYPED_TEST(StableSort, WithNullMin)
   } else {
     // for bools only validate that the null element landed at the front, since
     // the rest of the values are equivalent and yields random sorted order.
-    auto to_host = [](column_view const& col) {
+    auto to_host = [](cudf::column_view const& col) {
       thrust::host_vector<int32_t> h_data(col.size());
       CUDF_CUDA_TRY(cudaMemcpy(
         h_data.data(), col.data<int32_t>(), h_data.size() * sizeof(int32_t), cudaMemcpyDefault));
@@ -138,7 +141,7 @@ TYPED_TEST(StableSort, WithNullMin)
     thrust::host_vector<int32_t> h_got = to_host(got->view());
     EXPECT_EQ(h_exp.front(), h_got.front());
 
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
     run_stable_sort_test(input, expected_for_bool, column_order);
   }
 }
@@ -147,15 +150,16 @@ TYPED_TEST(StableSort, WithAllValid)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 10, 2}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 10, 2}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
-  std::vector<order> column_order{order::ASCENDING, order::ASCENDING, order::DESCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};
 
-  auto got = stable_sorted_order(input, column_order);
+  auto got = cudf::stable_sorted_order(input, column_order);
 
   // Skip validating bools order. Valid true bools are all
   // equivalent, and yield random order after thrust::sort
@@ -164,7 +168,7 @@ TYPED_TEST(StableSort, WithAllValid)
 
     run_stable_sort_test(input, expected, column_order);
   } else {
-    fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
+    cudf::test::fixed_width_column_wrapper<int32_t> expected_for_bool{{2, 0, 3, 1, 4}};
     run_stable_sort_test(input, expected_for_bool, column_order);
   }
 }
@@ -173,66 +177,68 @@ TYPED_TEST(StableSort, MisMatchInColumnOrderSize)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  std::vector<order> column_order{order::ASCENDING, order::DESCENDING};
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING, cudf::order::DESCENDING};
 
-  EXPECT_THROW(stable_sorted_order(input, column_order), logic_error);
-  EXPECT_THROW(stable_sort_by_key(input, input, column_order), logic_error);
+  EXPECT_THROW(cudf::stable_sorted_order(input, column_order), cudf::logic_error);
+  EXPECT_THROW(cudf::stable_sort_by_key(input, input, column_order), cudf::logic_error);
 }
 
 TYPED_TEST(StableSort, MisMatchInNullPrecedenceSize)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view input{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view input{{col1, col2, col3}};
 
-  std::vector<order> column_order{order::ASCENDING, order::DESCENDING, order::DESCENDING};
-  std::vector<null_order> null_precedence{null_order::AFTER, null_order::BEFORE};
+  std::vector<cudf::order> column_order{
+    cudf::order::ASCENDING, cudf::order::DESCENDING, cudf::order::DESCENDING};
+  std::vector<cudf::null_order> null_precedence{cudf::null_order::AFTER, cudf::null_order::BEFORE};
 
-  EXPECT_THROW(stable_sorted_order(input, column_order, null_precedence), logic_error);
-  EXPECT_THROW(stable_sort_by_key(input, input, column_order, null_precedence), logic_error);
+  EXPECT_THROW(cudf::stable_sorted_order(input, column_order, null_precedence), cudf::logic_error);
+  EXPECT_THROW(cudf::stable_sort_by_key(input, input, column_order, null_precedence),
+               cudf::logic_error);
 }
 
 TYPED_TEST(StableSort, ZeroSizedColumns)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> col1{};
-  table_view input{{col1}};
+  cudf::test::fixed_width_column_wrapper<T> col1{};
+  cudf::table_view input{{col1}};
 
-  fixed_width_column_wrapper<int32_t> expected{};
-  std::vector<order> column_order{order::ASCENDING};
+  cudf::test::fixed_width_column_wrapper<int32_t> expected{};
+  std::vector<cudf::order> column_order{cudf::order::ASCENDING};
 
-  auto got = stable_sorted_order(input, column_order);
+  auto got = cudf::stable_sorted_order(input, column_order);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, got->view());
 
   run_stable_sort_test(input, expected, column_order);
 }
 
-struct StableSortByKey : public BaseFixture {
+struct StableSortByKey : public cudf::test::BaseFixture {
 };
 
 TEST_F(StableSortByKey, ValueKeysSizeMismatch)
 {
   using T = int64_t;
 
-  fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
-  strings_column_wrapper col2({"d", "e", "a", "d", "k"});
-  fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
-  table_view values{{col1, col2, col3}};
+  cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
+  cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
+  cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
+  cudf::table_view values{{col1, col2, col3}};
 
-  fixed_width_column_wrapper<T> key_col{{5, 4, 3, 5}};
-  table_view keys{{key_col}};
+  cudf::test::fixed_width_column_wrapper<T> key_col{{5, 4, 3, 5}};
+  cudf::table_view keys{{key_col}};
 
-  EXPECT_THROW(stable_sort_by_key(values, keys), logic_error);
+  EXPECT_THROW(cudf::stable_sort_by_key(values, keys), cudf::logic_error);
 }
 
 template <typename T>
@@ -272,5 +278,17 @@ TYPED_TEST(StableSortFixedPoint, FixedPointSortedOrderGather)
   CUDF_TEST_EXPECT_TABLES_EQUAL(sorted_table, sorted->view());
 }
 
-}  // namespace test
-}  // namespace cudf
+using StableSortDouble = StableSort<double>;
+TEST_F(StableSortDouble, InfinityAndNaN)
+{
+  auto constexpr NaN = std::numeric_limits<double>::quiet_NaN();
+  auto constexpr Inf = std::numeric_limits<double>::infinity();
+
+  auto input = cudf::test::fixed_width_column_wrapper<double>(
+    {-0.0, -NaN, -NaN, NaN, Inf, -Inf, 7.0, 5.0, 6.0, NaN, Inf, -Inf, -NaN, -NaN, -0.0});
+  auto expected =  // -inf,-inf,-0,-0,5,6,7,inf,inf,-nan,-nan,nan,nan,-nan,-nan
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+      {5, 11, 0, 14, 7, 8, 6, 4, 10, 1, 2, 3, 9, 12, 13});
+  auto results = stable_sorted_order(cudf::table_view({input}));
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+}

From 9f8b93680ea81209ce34db6957cc0ef2791fa806 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <tribizel@nvidia.com>
Date: Sat, 15 Oct 2022 09:06:29 +0200
Subject: [PATCH 039/202] Handle `multibyte_split` byte_range out-of-bounds
 offsets on host (#11885)

In order to uniformize the interface for a future combined handling of byte ranges between read_csv and read_text, this PR replaces the `cutoff_offset` by a plain integer again, and handles finding the first out-of-bounds on the host side instead.

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11885
---
 cpp/src/io/text/multibyte_split.cu | 195 ++++++++++++++---------------
 1 file changed, 97 insertions(+), 98 deletions(-)

diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index 133c5fe9826..136eb8d24c6 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -31,7 +31,6 @@
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <limits>
 #include <rmm/cuda_stream_pool.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/exec_policy.hpp>
@@ -39,6 +38,8 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <thrust/copy.h>
+#include <thrust/find.h>
+#include <thrust/iterator/counting_iterator.h>
 #include <thrust/transform.h>
 
 #include <cub/block/block_load.cuh>
@@ -46,6 +47,8 @@
 
 #pragma GCC diagnostic pop
 
+#include <cstdint>
+#include <limits>
 #include <memory>
 #include <numeric>
 #include <optional>
@@ -160,6 +163,10 @@ struct PatternScan {
   }
 };
 
+// type aliases to distinguish between row offsets and character offsets
+using output_offset = int64_t;
+using byte_offset   = int64_t;
+
 // multibyte_split works by splitting up inputs in to 32 inputs (bytes) per thread, and transforming
 // them in to data structures called "multistates". these multistates are created by searching a
 // trie, but instead of a tradition trie where the search begins at a single node at the beginning,
@@ -170,35 +177,11 @@ struct PatternScan {
 // it begins in. From there, each thread can then take deterministic action. In this case, the
 // deterministic action is counting and outputting delimiter offsets when a delimiter is found.
 
-// This struct provides output offsets that are only incremented until a cutoff point.
-struct cutoff_offset {
-  // magnitude stores the offset, sign bit stores whether we are past the cutoff
-  int64_t value = 0;
-
-  constexpr cutoff_offset() = default;
-
-  constexpr cutoff_offset(int64_t offset, bool is_past_cutoff)
-    : value{is_past_cutoff ? -offset : offset}
-  {
-  }
-
-  [[nodiscard]] constexpr int64_t offset() const { return value < 0 ? -value : value; }
-
-  [[nodiscard]] constexpr bool is_past_end() { return value < 0; }
-
-  friend constexpr cutoff_offset operator+(cutoff_offset lhs, cutoff_offset rhs)
-  {
-    auto const past_end = lhs.is_past_end() or rhs.is_past_end();
-    auto const offset   = lhs.offset() + (lhs.is_past_end() ? 0 : rhs.offset());
-    return cutoff_offset{offset, past_end};
-  }
-};
-
 __global__ void multibyte_split_init_kernel(
   cudf::size_type base_tile_idx,
   cudf::size_type num_tiles,
   cudf::io::text::detail::scan_tile_state_view<multistate> tile_multistates,
-  cudf::io::text::detail::scan_tile_state_view<cutoff_offset> tile_output_offsets,
+  cudf::io::text::detail::scan_tile_state_view<output_offset> tile_output_offsets,
   cudf::io::text::detail::scan_tile_status status =
     cudf::io::text::detail::scan_tile_status::invalid)
 {
@@ -212,9 +195,9 @@ __global__ void multibyte_split_init_kernel(
 
 __global__ void multibyte_split_seed_kernel(
   cudf::io::text::detail::scan_tile_state_view<multistate> tile_multistates,
-  cudf::io::text::detail::scan_tile_state_view<cutoff_offset> tile_output_offsets,
+  cudf::io::text::detail::scan_tile_state_view<output_offset> tile_output_offsets,
   multistate tile_multistate_seed,
-  cutoff_offset tile_output_offset)
+  output_offset tile_output_offset)
 {
   auto const thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
   if (thread_idx == 0) {
@@ -225,19 +208,18 @@ __global__ void multibyte_split_seed_kernel(
 
 __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel(
   cudf::size_type base_tile_idx,
-  int64_t base_input_offset,
-  int64_t base_offset_offset,
+  byte_offset base_input_offset,
+  output_offset base_output_offset,
   cudf::io::text::detail::scan_tile_state_view<multistate> tile_multistates,
-  cudf::io::text::detail::scan_tile_state_view<cutoff_offset> tile_output_offsets,
+  cudf::io::text::detail::scan_tile_state_view<output_offset> tile_output_offsets,
   cudf::device_span<char const> delim,
   cudf::device_span<char const> chunk_input_chars,
-  int64_t byte_range_end,
-  cudf::split_device_span<int64_t> output_offsets)
+  cudf::split_device_span<byte_offset> row_offsets)
 {
   using InputLoad =
     cub::BlockLoad<char, THREADS_PER_TILE, ITEMS_PER_THREAD, cub::BLOCK_LOAD_WARP_TRANSPOSE>;
-  using OffsetScan         = cub::BlockScan<cutoff_offset, THREADS_PER_TILE>;
-  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<cutoff_offset>;
+  using OffsetScan         = cub::BlockScan<output_offset, THREADS_PER_TILE>;
+  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<output_offset>;
 
   __shared__ union {
     typename InputLoad::TempStorage input_load;
@@ -269,17 +251,15 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel(
 
   // STEP 3: Flag matches
 
-  cutoff_offset thread_offset;
+  output_offset thread_offset{};
   uint32_t thread_match_mask[(ITEMS_PER_THREAD + 31) / 32]{};
 
   for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
-    thread_multistate        = transition(thread_chars[i], thread_multistate, delim);
-    auto const thread_state  = thread_multistate.max_tail();
-    auto const is_match      = i < thread_input_size and thread_state == delim.size();
-    auto const match_end     = base_input_offset + thread_input_offset + i + 1;
-    auto const is_past_range = match_end >= byte_range_end;
+    thread_multistate       = transition(thread_chars[i], thread_multistate, delim);
+    auto const thread_state = thread_multistate.max_tail();
+    auto const is_match     = i < thread_input_size and thread_state == delim.size();
     thread_match_mask[i / 32] |= uint32_t{is_match} << (i % 32);
-    thread_offset = thread_offset + cutoff_offset{is_match, is_past_range};
+    thread_offset += output_offset{is_match};
   }
 
   // STEP 4: Scan flags to determine absolute thread output offset
@@ -293,29 +273,27 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void multibyte_split_kernel(
 
   for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
     auto const is_match = (thread_match_mask[i / 32] >> (i % 32)) & 1u;
-    if (is_match && !thread_offset.is_past_end()) {
-      auto const match_end     = base_input_offset + thread_input_offset + i + 1;
-      auto const is_past_range = match_end >= byte_range_end;
-      output_offsets[thread_offset.offset() - base_offset_offset] = match_end;
-      thread_offset = thread_offset + cutoff_offset{true, is_past_range};
+    if (is_match) {
+      auto const match_end = base_input_offset + thread_input_offset + i + 1;
+      row_offsets[thread_offset - base_output_offset] = match_end;
+      thread_offset++;
     }
   }
 }
 
 __global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel(
   cudf::size_type base_tile_idx,
-  int64_t base_input_offset,
-  int64_t base_offset_offset,
-  cudf::io::text::detail::scan_tile_state_view<cutoff_offset> tile_output_offsets,
+  byte_offset base_input_offset,
+  output_offset base_output_offset,
+  cudf::io::text::detail::scan_tile_state_view<output_offset> tile_output_offsets,
   char delim,
   cudf::device_span<char const> chunk_input_chars,
-  int64_t byte_range_end,
-  cudf::split_device_span<int64_t> output_offsets)
+  cudf::split_device_span<byte_offset> row_offsets)
 {
   using InputLoad =
     cub::BlockLoad<char, THREADS_PER_TILE, ITEMS_PER_THREAD, cub::BLOCK_LOAD_WARP_TRANSPOSE>;
-  using OffsetScan         = cub::BlockScan<cutoff_offset, THREADS_PER_TILE>;
-  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<cutoff_offset>;
+  using OffsetScan         = cub::BlockScan<output_offset, THREADS_PER_TILE>;
+  using OffsetScanCallback = cudf::io::text::detail::scan_tile_state_callback<output_offset>;
 
   __shared__ union {
     typename InputLoad::TempStorage input_load;
@@ -338,15 +316,13 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel(
 
   // STEP 2: Flag matches
 
-  cutoff_offset thread_offset;
+  output_offset thread_offset{};
   uint32_t thread_match_mask[(ITEMS_PER_THREAD + 31) / 32]{};
 
   for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
-    auto const is_match      = i < thread_input_size and thread_chars[i] == delim;
-    auto const match_end     = base_input_offset + thread_input_offset + i + 1;
-    auto const is_past_range = match_end >= byte_range_end;
+    auto const is_match = i < thread_input_size and thread_chars[i] == delim;
     thread_match_mask[i / 32] |= uint32_t{is_match} << (i % 32);
-    thread_offset = thread_offset + cutoff_offset{is_match, is_past_range};
+    thread_offset += output_offset{is_match};
   }
 
   // STEP 3: Scan flags to determine absolute thread output offset
@@ -360,11 +336,10 @@ __global__ __launch_bounds__(THREADS_PER_TILE) void byte_split_kernel(
 
   for (int32_t i = 0; i < ITEMS_PER_THREAD; i++) {
     auto const is_match = (thread_match_mask[i / 32] >> (i % 32)) & 1u;
-    if (is_match && !thread_offset.is_past_end()) {
-      auto const match_end     = base_input_offset + thread_input_offset + i + 1;
-      auto const is_past_range = match_end >= byte_range_end;
-      output_offsets[thread_offset.offset() - base_offset_offset] = match_end;
-      thread_offset = thread_offset + cutoff_offset{true, is_past_range};
+    if (is_match) {
+      auto const match_end = base_input_offset + thread_input_offset + i + 1;
+      row_offsets[thread_offset - base_output_offset] = match_end;
+      thread_offset++;
     }
   }
 }
@@ -611,7 +586,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
   // best when at least 32 more than max possible concurrent tiles, due to rolling `invalid`s
   auto num_tile_states  = std::max(32, TILES_PER_CHUNK * concurrency + 32);
   auto tile_multistates = scan_tile_state<multistate>(num_tile_states, stream);
-  auto tile_offsets     = scan_tile_state<cutoff_offset>(num_tile_states, stream);
+  auto tile_offsets     = scan_tile_state<output_offset>(num_tile_states, stream);
 
   multibyte_split_init_kernel<<<TILES_PER_CHUNK,
                                 THREADS_PER_TILE,
@@ -633,15 +608,15 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
     tile_multistates,
     tile_offsets,
     multistate_seed,
-    {});
+    0);
 
   auto reader               = source.create_reader();
-  auto chunk_offset         = std::max<int64_t>(0, byte_range.offset() - delimiter.size());
+  auto chunk_offset         = std::max<byte_offset>(0, byte_range.offset() - delimiter.size());
   auto const byte_range_end = byte_range.offset() + byte_range.size();
   reader->skip_bytes(chunk_offset);
   // amortize output chunk allocations over 8 worst-case outputs. This limits the overallocation
   constexpr auto max_growth = 8;
-  output_builder<int64_t> offset_storage(ITEMS_PER_CHUNK, max_growth, stream);
+  output_builder<byte_offset> row_offset_storage(ITEMS_PER_CHUNK, max_growth, stream);
   output_builder<char> char_storage(ITEMS_PER_CHUNK, max_growth, stream);
 
   fork_stream(streams, stream);
@@ -653,22 +628,23 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
   auto& scan_stream     = streams[1];
   auto chunk            = reader->get_next_chunk(ITEMS_PER_CHUNK, read_stream);
   int64_t base_tile_idx = 0;
-  std::optional<int64_t> first_offset;
-  std::optional<int64_t> last_offset;
-  if (byte_range.offset() == 0) { first_offset = 0; }
+  std::optional<byte_offset> first_row_offset;
+  std::optional<byte_offset> last_row_offset;
+  bool found_last_offset = false;
+  if (byte_range.offset() == 0) { first_row_offset = 0; }
   std::swap(read_stream, scan_stream);
 
   while (chunk->size() > 0) {
     // if we found the last delimiter, or didn't find delimiters inside the byte range at all: abort
-    if (last_offset.has_value() or
-        (not first_offset.has_value() and chunk_offset >= byte_range_end)) {
+    if (last_row_offset.has_value() or
+        (not first_row_offset.has_value() and chunk_offset >= byte_range_end)) {
       break;
     }
 
     auto tiles_in_launch =
       cudf::util::div_rounding_up_safe(chunk->size(), static_cast<std::size_t>(ITEMS_PER_TILE));
 
-    auto offset_output = offset_storage.next_output(scan_stream);
+    auto row_offsets = row_offset_storage.next_output(scan_stream);
 
     // reset the next chunk of tile state
     multibyte_split_init_kernel<<<tiles_in_launch,
@@ -690,12 +666,11 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                           scan_stream.value()>>>(  //
         base_tile_idx,
         chunk_offset,
-        offset_storage.size(),
+        row_offset_storage.size(),
         tile_offsets,
         delimiter[0],
         *chunk,
-        byte_range_end,
-        offset_output);
+        row_offsets);
     } else {
       multibyte_split_kernel<<<tiles_in_launch,
                                THREADS_PER_TILE,
@@ -703,34 +678,55 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                                scan_stream.value()>>>(  //
         base_tile_idx,
         chunk_offset,
-        offset_storage.size(),
+        row_offset_storage.size(),
         tile_multistates,
         tile_offsets,
         {device_delim.data(), static_cast<std::size_t>(device_delim.size())},
         *chunk,
-        byte_range_end,
-        offset_output);
+        row_offsets);
     }
 
     // load the next chunk
     auto next_chunk = reader->get_next_chunk(ITEMS_PER_CHUNK, read_stream);
     // while that is running, determine how many offsets we output (synchronizes)
-    auto next_tile_offset =
-      tile_offsets.get_inclusive_prefix(base_tile_idx + tiles_in_launch - 1, scan_stream);
-    offset_storage.advance_output(next_tile_offset.offset() - offset_storage.size());
+    auto const new_offsets = [&] {
+      auto const new_offsets_unclamped =
+        tile_offsets.get_inclusive_prefix(base_tile_idx + tiles_in_launch - 1, scan_stream) -
+        static_cast<output_offset>(row_offset_storage.size());
+      // if we are not in the last chunk, we can use all offsets
+      if (chunk_offset + static_cast<output_offset>(chunk->size()) < byte_range_end) {
+        return new_offsets_unclamped;
+      }
+      // if we are in the last chunk, we need to find the first out-of-bounds offset
+      auto const it = thrust::make_counting_iterator(output_offset{});
+      auto const end_loc =
+        *thrust::find_if(rmm::exec_policy_nosync(scan_stream),
+                         it,
+                         it + new_offsets_unclamped,
+                         [row_offsets, byte_range_end] __device__(output_offset i) {
+                           return row_offsets[i] >= byte_range_end;
+                         });
+      // if we had no out-of-bounds offset, we copy all offsets
+      if (end_loc == new_offsets_unclamped) { return end_loc; }
+      // otherwise we copy only up to (including) the first out-of-bounds delimiter
+      found_last_offset = true;
+      return end_loc + 1;
+    }();
+    row_offset_storage.advance_output(new_offsets);
     // determine if we found the first or last field offset for the byte range
-    if (next_tile_offset.offset() > 0 and not first_offset) {
-      first_offset = offset_storage.front_element(scan_stream);
+    if (new_offsets > 0 and not first_row_offset) {
+      first_row_offset = row_offset_storage.front_element(scan_stream);
     }
-    if (next_tile_offset.is_past_end()) { last_offset = offset_storage.back_element(scan_stream); }
+    if (found_last_offset) { last_row_offset = row_offset_storage.back_element(scan_stream); }
     // copy over the characters we need, if we already encountered the first field delimiter
-    if (first_offset.has_value()) {
-      auto const begin    = chunk->data() + std::max<int64_t>(0, *first_offset - chunk_offset);
-      auto const sentinel = last_offset.value_or(std::numeric_limits<int64_t>::max());
-      auto const end = chunk->data() + std::min<int64_t>(sentinel - chunk_offset, chunk->size());
+    if (first_row_offset.has_value()) {
+      auto const begin = chunk->data() + std::max<byte_offset>(0, *first_row_offset - chunk_offset);
+      auto const sentinel = last_row_offset.value_or(std::numeric_limits<byte_offset>::max());
+      auto const end =
+        chunk->data() + std::min<byte_offset>(sentinel - chunk_offset, chunk->size());
       auto const output_size = end - begin;
       auto char_output       = char_storage.next_output(scan_stream);
-      auto const split       = begin + std::min<int64_t>(output_size, char_output.head().size());
+      auto const split = begin + std::min<byte_offset>(output_size, char_output.head().size());
       thrust::copy(rmm::exec_policy_nosync(scan_stream), begin, split, char_output.head().begin());
       thrust::copy(rmm::exec_policy_nosync(scan_stream), split, end, char_output.tail().begin());
       char_storage.advance_output(output_size);
@@ -739,7 +735,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
     cudaEventRecord(last_launch_event, scan_stream.value());
 
     std::swap(read_stream, scan_stream);
-    base_tile_idx += TILES_PER_CHUNK;
+    base_tile_idx += tiles_in_launch;
     chunk_offset += chunk->size();
     chunk = std::move(next_chunk);
   }
@@ -750,24 +746,27 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
 
   // if the input was empty, we didn't find a delimiter at all,
   // or the first delimiter was also the last: empty output
-  if (chunk_offset == 0 or not first_offset.has_value() or first_offset == last_offset) {
+  if (chunk_offset == 0 or not first_row_offset.has_value() or
+      first_row_offset == last_row_offset) {
     return make_empty_column(type_id::STRING);
   }
 
   auto chars          = char_storage.gather(stream, mr);
-  auto global_offsets = offset_storage.gather(stream, mr);
+  auto global_offsets = row_offset_storage.gather(stream, mr);
 
-  bool const insert_begin = *first_offset == 0;
-  bool const insert_end   = not last_offset.has_value() or last_offset == chunk_offset;
+  bool const insert_begin = *first_row_offset == 0;
+  bool const insert_end   = not last_row_offset.has_value() or last_row_offset == chunk_offset;
   rmm::device_uvector<int32_t> offsets{
     global_offsets.size() + insert_begin + insert_end, stream, mr};
   if (insert_begin) { offsets.set_element_to_zero_async(0, stream); }
-  if (insert_end) { offsets.set_element(offsets.size() - 1, chunk_offset - *first_offset, stream); }
+  if (insert_end) {
+    offsets.set_element(offsets.size() - 1, chunk_offset - *first_row_offset, stream);
+  }
   thrust::transform(rmm::exec_policy(stream),
                     global_offsets.begin(),
                     global_offsets.end(),
                     offsets.begin() + insert_begin,
-                    [baseline = *first_offset] __device__(int64_t global_offset) {
+                    [baseline = *first_row_offset] __device__(byte_offset global_offset) {
                       return static_cast<int32_t>(global_offset - baseline);
                     });
 

From edc058f0e250e4fe6a1cd3829683c13b6a394373 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 17 Oct 2022 14:48:21 -0500
Subject: [PATCH 040/202] Add `nanosecond` & `microsecond` to
 `DatetimeProperties` (#11911)

This PR:

- [x] Implemented `extract_milli_second`, `extract_micro_second` and `extract_nano_second` in libcudf.
- [x] Added `nanosecond` and `microsecond` in `DatetimeProperties` & `DatetimeIndex`.
- [x] Updated docs
- [x] Added & modified tests

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - https://github.com/nvdbaranec
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11911
---
 cpp/include/cudf/datetime.hpp               | 71 ++++++++++++++++---
 cpp/include/cudf/detail/datetime.hpp        | 33 +++++++++
 cpp/src/datetime/datetime_ops.cu            | 76 +++++++++++++++++++--
 cpp/tests/datetime/datetime_ops_test.cpp    | 54 +++++++++++++++
 docs/cudf/source/api_docs/index_objects.rst |  9 ++-
 docs/cudf/source/api_docs/series.rst        | 28 ++++----
 python/cudf/cudf/_lib/cpp/datetime.pxd      |  9 +++
 python/cudf/cudf/_lib/datetime.pyx          | 12 ++++
 python/cudf/cudf/core/index.py              | 50 ++++++++++++++
 python/cudf/cudf/core/series.py             | 61 ++++++++++++++++-
 python/cudf/cudf/tests/test_datetime.py     | 10 ++-
 11 files changed, 377 insertions(+), 36 deletions(-)

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index a8955ffb17c..fb04336871f 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -36,7 +36,7 @@ namespace datetime {
  */
 
 /**
- * @brief  Extracts year from any date time type and returns an int16_t
+ * @brief  Extracts year from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -50,7 +50,7 @@ std::unique_ptr<cudf::column> extract_year(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Extracts month from any date time type and returns an int16_t
+ * @brief  Extracts month from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -64,7 +64,7 @@ std::unique_ptr<cudf::column> extract_month(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Extracts day from any date time type and returns an int16_t
+ * @brief  Extracts day from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -78,7 +78,7 @@ std::unique_ptr<cudf::column> extract_day(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Extracts day from any date time type and returns an int16_t
+ * @brief  Extracts day from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -92,7 +92,7 @@ std::unique_ptr<cudf::column> extract_weekday(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Extracts hour from any date time type and returns an int16_t
+ * @brief  Extracts hour from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -106,7 +106,7 @@ std::unique_ptr<cudf::column> extract_hour(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Extracts minute from any date time type and returns an int16_t
+ * @brief  Extracts minute from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -120,7 +120,7 @@ std::unique_ptr<cudf::column> extract_minute(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Extracts second from any date time type and returns an int16_t
+ * @brief  Extracts second from any datetime type and returns an int16_t
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -133,6 +133,57 @@ std::unique_ptr<cudf::column> extract_second(
   cudf::column_view const& column,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief  Extracts millisecond fraction from any datetime type and returns an int16_t
+ * cudf::column.
+ *
+ * A millisecond fraction is only the 3 digits that make up the millisecond portion of a duration.
+ * For example, the millisecond fraction of 1.234567890 seconds is 234.
+ *
+ * @param column cudf::column_view of the input datetime values
+ * @param mr Device memory resource used to allocate device memory of the returned column
+ *
+ * @returns cudf::column of the extracted int16_t milliseconds
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP
+ */
+std::unique_ptr<cudf::column> extract_millisecond_fraction(
+  cudf::column_view const& column,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief  Extracts microsecond fraction from any datetime type and returns an int16_t
+ * cudf::column.
+ *
+ * A microsecond fraction is only the 3 digits that make up the microsecond portion of a duration.
+ * For example, the microsecond fraction of 1.234567890 seconds is 567.
+ *
+ * @param column cudf::column_view of the input datetime values
+ * @param mr Device memory resource used to allocate device memory of the returned column
+ *
+ * @returns cudf::column of the extracted int16_t microseconds
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP
+ */
+std::unique_ptr<cudf::column> extract_microsecond_fraction(
+  cudf::column_view const& column,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @brief  Extracts nanosecond fraction from any datetime type and returns an int16_t
+ * cudf::column.
+ *
+ * A nanosecond fraction is only the 3 digits that make up the nanosecond portion of a duration.
+ * For example, the nanosecond fraction of 1.234567890 seconds is 890.
+ *
+ * @param column cudf::column_view of the input datetime values
+ * @param mr Device memory resource used to allocate device memory of the returned column
+ *
+ * @returns cudf::column of the extracted int16_t nanoseconds
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP
+ */
+std::unique_ptr<cudf::column> extract_nanosecond_fraction(
+  cudf::column_view const& column,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of group
 /**
  * @addtogroup datetime_compute
@@ -141,7 +192,7 @@ std::unique_ptr<cudf::column> extract_second(
  */
 
 /**
- * @brief  Computes the last day of the month in date time type and returns a TIMESTAMP_DAYS
+ * @brief  Computes the last day of the month in datetime type and returns a TIMESTAMP_DAYS
  * cudf::column.
  *
  * @param column cudf::column_view of the input datetime values
@@ -169,7 +220,7 @@ std::unique_ptr<cudf::column> day_of_year(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Adds or subtracts a number of months from the date time type and returns a
+ * @brief  Adds or subtracts a number of months from the datetime type and returns a
  * timestamp column that is of the same type as the input `timestamps` column.
  *
  * For a given row, if the `timestamps` or the `months` column value is null,
@@ -204,7 +255,7 @@ std::unique_ptr<cudf::column> add_calendrical_months(
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
- * @brief  Adds or subtracts a number of months from the date time type and returns a
+ * @brief  Adds or subtracts a number of months from the datetime type and returns a
  * timestamp column that is of the same type as the input `timestamps` column.
  *
  * For a given row, if the `timestamps` value is null, the output for that row is null.
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index 7a2545fbdcf..d17e641533e 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -94,6 +94,39 @@ std::unique_ptr<cudf::column> extract_second(
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @copydoc cudf::extract_millisecond_fraction(cudf::column_view const&,
+ * rmm::mr::device_memory_resource *)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<cudf::column> extract_millisecond_fraction(
+  cudf::column_view const& column,
+  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::extract_microsecond_fraction(cudf::column_view const&,
+ * rmm::mr::device_memory_resource *)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<cudf::column> extract_microsecond_fraction(
+  cudf::column_view const& column,
+  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+/**
+ * @copydoc cudf::extract_nanosecond_fraction(cudf::column_view const&,
+ * rmm::mr::device_memory_resource *)
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<cudf::column> extract_nanosecond_fraction(
+  cudf::column_view const& column,
+  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::mr::device_memory_resource *)
  *
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index ee026d6c395..e89792525c9 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -76,9 +76,22 @@ struct extract_component_operator {
 
     if (time_since_midnight.count() < 0) { time_since_midnight += days(1); }
 
-    auto hrs_  = duration_cast<hours>(time_since_midnight);
-    auto mins_ = duration_cast<minutes>(time_since_midnight - hrs_);
-    auto secs_ = duration_cast<seconds>(time_since_midnight - hrs_ - mins_);
+    auto const hrs_  = [&] { return duration_cast<hours>(time_since_midnight); };
+    auto const mins_ = [&] { return duration_cast<minutes>(time_since_midnight) - hrs_(); };
+    auto const secs_ = [&] {
+      return duration_cast<seconds>(time_since_midnight) - hrs_() - mins_();
+    };
+    auto const millisecs_ = [&] {
+      return duration_cast<milliseconds>(time_since_midnight) - hrs_() - mins_() - secs_();
+    };
+    auto const microsecs_ = [&] {
+      return duration_cast<microseconds>(time_since_midnight) - hrs_() - mins_() - secs_() -
+             millisecs_();
+    };
+    auto const nanosecs_ = [&] {
+      return duration_cast<nanoseconds>(time_since_midnight) - hrs_() - mins_() - secs_() -
+             millisecs_() - microsecs_();
+    };
 
     switch (Component) {
       case datetime_component::YEAR:
@@ -89,9 +102,12 @@ struct extract_component_operator {
         return static_cast<unsigned>(year_month_day(days_since_epoch).day());
       case datetime_component::WEEKDAY:
         return year_month_weekday(days_since_epoch).weekday().iso_encoding();
-      case datetime_component::HOUR: return hrs_.count();
-      case datetime_component::MINUTE: return mins_.count();
-      case datetime_component::SECOND: return secs_.count();
+      case datetime_component::HOUR: return hrs_().count();
+      case datetime_component::MINUTE: return mins_().count();
+      case datetime_component::SECOND: return secs_().count();
+      case datetime_component::MILLISECOND: return millisecs_().count();
+      case datetime_component::MICROSECOND: return microsecs_().count();
+      case datetime_component::NANOSECOND: return nanosecs_().count();
       default: return 0;
     }
   }
@@ -495,6 +511,33 @@ std::unique_ptr<column> extract_second(column_view const& column,
     cudf::type_id::INT16>(column, stream, mr);
 }
 
+std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
+                                                     rmm::cuda_stream_view stream,
+                                                     rmm::mr::device_memory_resource* mr)
+{
+  return detail::apply_datetime_op<
+    detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
+    cudf::type_id::INT16>(column, stream, mr);
+}
+
+std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
+                                                     rmm::cuda_stream_view stream,
+                                                     rmm::mr::device_memory_resource* mr)
+{
+  return detail::apply_datetime_op<
+    detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
+    cudf::type_id::INT16>(column, stream, mr);
+}
+
+std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::mr::device_memory_resource* mr)
+{
+  return detail::apply_datetime_op<
+    detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
+    cudf::type_id::INT16>(column, stream, mr);
+}
+
 std::unique_ptr<column> last_day_of_month(column_view const& column,
                                           rmm::cuda_stream_view stream,
                                           rmm::mr::device_memory_resource* mr)
@@ -607,6 +650,27 @@ std::unique_ptr<column> extract_second(column_view const& column,
   return detail::extract_second(column, cudf::default_stream_value, mr);
 }
 
+std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
+                                                     rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_millisecond_fraction(column, cudf::default_stream_value, mr);
+}
+
+std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
+                                                     rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_microsecond_fraction(column, cudf::default_stream_value, mr);
+}
+
+std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
+                                                    rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_nanosecond_fraction(column, cudf::default_stream_value, mr);
+}
+
 std::unique_ptr<column> last_day_of_month(column_view const& column,
                                           rmm::mr::device_memory_resource* mr)
 {
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 2898a649e36..c6d36b2aa6e 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -60,6 +60,9 @@ TYPED_TEST(NonTimestampTest, TestThrowsOnNonTimestamp)
   EXPECT_THROW(extract_hour(col), cudf::logic_error);
   EXPECT_THROW(extract_minute(col), cudf::logic_error);
   EXPECT_THROW(extract_second(col), cudf::logic_error);
+  EXPECT_THROW(extract_millisecond_fraction(col), cudf::logic_error);
+  EXPECT_THROW(extract_microsecond_fraction(col), cudf::logic_error);
+  EXPECT_THROW(extract_nanosecond_fraction(col), cudf::logic_error);
   EXPECT_THROW(last_day_of_month(col), cudf::logic_error);
   EXPECT_THROW(day_of_year(col), cudf::logic_error);
   EXPECT_THROW(add_calendrical_months(
@@ -97,12 +100,21 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
       1674631932929   // 2023-01-25 07:32:12.929 GMT
     };
 
+  auto timestamps_ns =
+    cudf::test::fixed_width_column_wrapper<cudf::timestamp_ns, cudf::timestamp_ns::rep>{
+      -23324234,  // 1969-12-31 23:59:59.976675766 GMT
+      23432424,   // 1970-01-01 00:00:00.023432424 GMT
+      987234623   // 1970-01-01 00:00:00.987234623 GMT
+    };
+
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_s),
                                  fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_year(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{10, 7, 1});
@@ -110,6 +122,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{10, 7, 1});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_month(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{12, 1, 1});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{26, 4, 25});
@@ -117,6 +131,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{26, 4, 25});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_day(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{31, 1, 1});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{2, 3, 3});
@@ -124,6 +140,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{2, 3, 3});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_weekday(timestamps_ms),
+                                 fixed_width_column_wrapper<int16_t>{2, 3, 3});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{0, 0, 0});
@@ -131,6 +149,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{14, 12, 7});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{23, 0, 0});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{0, 0, 0});
@@ -138,6 +158,8 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{1, 0, 32});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{59, 0, 0});
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_D),
                                  fixed_width_column_wrapper<int16_t>{0, 0, 0});
@@ -145,6 +167,35 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{12, 0, 12});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps_ms),
                                  fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_D),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_s),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ms),
+                                 fixed_width_column_wrapper<int16_t>{762, 0, 929});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{976, 23, 987});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_D),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_s),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ms),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{675, 432, 234});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_D),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_s),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ms),
+                                 fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
+                                 fixed_width_column_wrapper<int16_t>{766, 424, 623});
 }
 
 template <typename T>
@@ -175,6 +226,9 @@ TYPED_TEST(TypedDatetimeOpsTest, TestEmptyColumns)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_hour(timestamps), int16s);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_minute(timestamps), int16s);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_second(timestamps), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_millisecond_fraction(timestamps), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_microsecond_fraction(timestamps), int16s);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps), int16s);
 }
 
 TYPED_TEST(TypedDatetimeOpsTest, TestExtractingGeneratedDatetimeComponents)
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index 8e0e3bbd411..6edd15e7176 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -262,12 +262,15 @@ Time/date components
    DatetimeIndex.hour
    DatetimeIndex.minute
    DatetimeIndex.second
-   DatetimeIndex.dayofweek
-   DatetimeIndex.dayofyear
+   DatetimeIndex.microsecond
+   DatetimeIndex.nanosecond
    DatetimeIndex.day_of_year
+   DatetimeIndex.dayofyear
+   DatetimeIndex.dayofweek
    DatetimeIndex.weekday
-   DatetimeIndex.is_leap_year
    DatetimeIndex.quarter
+   DatetimeIndex.is_leap_year
+   
    DatetimeIndex.isocalendar
 
 Time-specific operations
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 53042041f6d..842319338b3 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -260,25 +260,27 @@ Datetime properties
 .. autosummary::
    :toctree: api/
 
+   year
+   month
    day
-   dayofweek
-   dayofyear
-   days_in_month
-   day_of_year
    hour
    minute
-   month
    second
+   microsecond
+   nanosecond
+   dayofweek
    weekday
-   year
-   is_leap_year
+   dayofyear
+   day_of_year
+   quarter
    is_month_start
    is_month_end
    is_quarter_start
    is_quarter_end
    is_year_start
    is_year_end
-   quarter
+   is_leap_year
+   days_in_month   
 
 Datetime methods
 ^^^^^^^^^^^^^^^^
@@ -286,11 +288,11 @@ Datetime methods
 .. autosummary::
    :toctree: api/
 
-   strftime
    isocalendar
-   ceil
-   floor
+   strftime
    round
+   floor
+   ceil
 
 
 Timedelta properties
@@ -300,11 +302,11 @@ Timedelta properties
 .. autosummary::
    :toctree: api/
 
-   components
    days
+   seconds   
    microseconds
    nanoseconds
-   seconds
+   components
 
 .. _api.series.str:
 .. include:: string_handling.rst
diff --git a/python/cudf/cudf/_lib/cpp/datetime.pxd b/python/cudf/cudf/_lib/cpp/datetime.pxd
index 74addb87357..d03587745e1 100644
--- a/python/cudf/cudf/_lib/cpp/datetime.pxd
+++ b/python/cudf/cudf/_lib/cpp/datetime.pxd
@@ -15,6 +15,15 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
     cdef unique_ptr[column] extract_hour(const column_view& column) except +
     cdef unique_ptr[column] extract_minute(const column_view& column) except +
     cdef unique_ptr[column] extract_second(const column_view& column) except +
+    cdef unique_ptr[column] extract_millisecond_fraction(
+        const column_view& column
+    ) except +
+    cdef unique_ptr[column] extract_microsecond_fraction(
+        const column_view& column
+    ) except +
+    cdef unique_ptr[column] extract_nanosecond_fraction(
+        const column_view& column
+    ) except +
 
     ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency":
         DAY "cudf::datetime::rounding_frequency::DAY"
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index e218400a2db..cb0a245b915 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -49,6 +49,18 @@ def extract_datetime_component(Column col, object field):
             c_result = move(libcudf_datetime.extract_minute(col_view))
         elif field == "second":
             c_result = move(libcudf_datetime.extract_second(col_view))
+        elif field == "millisecond":
+            c_result = move(
+                libcudf_datetime.extract_millisecond_fraction(col_view)
+            )
+        elif field == "microsecond":
+            c_result = move(
+                libcudf_datetime.extract_microsecond_fraction(col_view)
+            )
+        elif field == "nanosecond":
+            c_result = move(
+                libcudf_datetime.extract_nanosecond_fraction(col_view)
+            )
         elif field == "day_of_year":
             c_result = move(libcudf_datetime.day_of_year(col_view))
         else:
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 5b101f74664..0628497fc29 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2051,6 +2051,56 @@ def second(self):
         """
         return self._get_dt_field("second")
 
+    @property  # type: ignore
+    @_cudf_nvtx_annotate
+    def microsecond(self):
+        """
+        The microseconds of the datetime.
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import cudf
+        >>> datetime_index = cudf.Index(pd.date_range("2000-01-01",
+        ...             periods=3, freq="us"))
+        >>> datetime_index
+        DatetimeIndex([       '2000-01-01 00:00:00', '2000-01-01 00:00:00.000001',
+               '2000-01-01 00:00:00.000002'],
+              dtype='datetime64[ns]')
+        >>> datetime_index.microsecond
+        Int32Index([0, 1, 2], dtype='int32')
+        """  # noqa: E501
+        return as_index(
+            (
+                self._values.get_dt_field("millisecond")
+                * cudf.Scalar(1000, dtype="int32")
+            )
+            + self._values.get_dt_field("microsecond"),
+            name=self.name,
+        )
+
+    @property  # type: ignore
+    @_cudf_nvtx_annotate
+    def nanosecond(self):
+        """
+        The nanoseconds of the datetime.
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import cudf
+        >>> datetime_index = cudf.Index(pd.date_range("2000-01-01",
+        ...             periods=3, freq="ns"))
+        >>> datetime_index
+        DatetimeIndex([          '2000-01-01 00:00:00',
+                       '2000-01-01 00:00:00.000000001',
+                       '2000-01-01 00:00:00.000000002'],
+                      dtype='datetime64[ns]')
+        >>> datetime_index.nanosecond
+        Int16Index([0, 1, 2], dtype='int16')
+        """
+        return self._get_dt_field("nanosecond")
+
     @property  # type: ignore
     @_cudf_nvtx_annotate
     def weekday(self):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f11052096e3..7493202a3d1 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1206,7 +1206,8 @@ def __repr__(self):
             and not is_decimal_dtype(preprocess.dtype)
             and not is_struct_dtype(preprocess.dtype)
         ) or isinstance(
-            preprocess._column, cudf.core.column.timedelta.TimeDeltaColumn
+            preprocess._column,
+            cudf.core.column.timedelta.TimeDeltaColumn,
         ):
             output = repr(
                 preprocess.astype("O").fillna(cudf._NA_REP).to_pandas()
@@ -3591,6 +3592,64 @@ def second(self):
         """
         return self._get_dt_field("second")
 
+    @property  # type: ignore
+    @_cudf_nvtx_annotate
+    def microsecond(self):
+        """
+        The microseconds of the datetime.
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import cudf
+        >>> datetime_series = cudf.Series(pd.date_range("2000-01-01",
+        ...         periods=3, freq="us"))
+        >>> datetime_series
+        0    2000-01-01 00:00:00.000000
+        1    2000-01-01 00:00:00.000001
+        2    2000-01-01 00:00:00.000002
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.microsecond
+        0    0
+        1    1
+        2    2
+        dtype: int32
+        """
+        return Series(
+            data=(
+                self.series._column.get_dt_field("millisecond")
+                * cudf.Scalar(1000, dtype="int32")
+            )
+            + self.series._column.get_dt_field("microsecond"),
+            index=self.series._index,
+            name=self.series.name,
+        )
+
+    @property  # type: ignore
+    @_cudf_nvtx_annotate
+    def nanosecond(self):
+        """
+        The nanoseconds of the datetime.
+
+        Examples
+        --------
+        >>> import pandas as pd
+        >>> import cudf
+        >>> datetime_series = cudf.Series(pd.date_range("2000-01-01",
+        ...         periods=3, freq="ns"))
+        >>> datetime_series
+        0    2000-01-01 00:00:00.000000000
+        1    2000-01-01 00:00:00.000000001
+        2    2000-01-01 00:00:00.000000002
+        dtype: datetime64[ns]
+        >>> datetime_series.dt.nanosecond
+        0    0
+        1    1
+        2    2
+        dtype: int16
+        """
+        return self._get_dt_field("nanosecond")
+
     @property  # type: ignore
     @_cudf_nvtx_annotate
     def weekday(self):
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 800a8aeeab5..bd3b3561701 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -28,7 +28,9 @@ def data1():
 
 
 def data2():
-    return pd.date_range("20010101", "20020215", freq="400h", name="times")
+    return pd.date_range(
+        "20010101", freq="243434324423423234N", name="times", periods=10
+    )
 
 
 def timeseries_us_data():
@@ -81,6 +83,8 @@ def numerical_data():
     "hour",
     "minute",
     "second",
+    "microsecond",
+    "nanosecond",
     "weekday",
     "dayofweek",
     "dayofyear",
@@ -172,7 +176,7 @@ def test_dt_ops(data):
 
 
 # libcudf doesn't respect timezones
-@pytest.mark.parametrize("data", [data1()])
+@pytest.mark.parametrize("data", [data1(), data2()])
 @pytest.mark.parametrize("field", fields)
 def test_dt_series(data, field):
     pd_data = pd.Series(data.copy())
@@ -182,7 +186,7 @@ def test_dt_series(data, field):
     assert_eq(base, test)
 
 
-@pytest.mark.parametrize("data", [data1()])
+@pytest.mark.parametrize("data", [data1(), data2()])
 @pytest.mark.parametrize("field", fields)
 def test_dt_index(data, field):
     pd_data = data.copy()

From afa16b433f0d468d3ee933d93c7945a7078cad52 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 17 Oct 2022 20:24:49 -0500
Subject: [PATCH 041/202] Fix documentation referring to removed as_gpu_matrix
 method. (#11937)

This fixes outdated documentation that refers to the `as_gpu_matrix` method, which was removed.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11937
---
 docs/cudf/source/user_guide/cupy-interop.ipynb | 2 +-
 python/cudf/cudf/tests/test_dataframe.py       | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/docs/cudf/source/user_guide/cupy-interop.ipynb b/docs/cudf/source/user_guide/cupy-interop.ipynb
index 9fbac3b2578..47c6ba408fb 100644
--- a/docs/cudf/source/user_guide/cupy-interop.ipynb
+++ b/docs/cudf/source/user_guide/cupy-interop.ipynb
@@ -42,7 +42,7 @@
     "\n",
     "2. We can also use `DataFrame.values`.\n",
     "\n",
-    "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `as_gpu_matrix` and CuPy's `asarray` functionality."
+    "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `to_cupy` functionality."
    ]
   },
   {
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d9e9a4dbba1..1fcfbe5fc91 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -4286,26 +4286,26 @@ def test_series_values_property(data):
         pytest.param(
             {"A": [1, None, 3], "B": [1, 2, None]},
             marks=pytest.mark.xfail(
-                reason="Nulls not supported by as_gpu_matrix"
+                reason="Nulls not supported by values accessor"
             ),
         ),
         pytest.param(
             {"A": [None, None, None], "B": [None, None, None]},
             marks=pytest.mark.xfail(
-                reason="Nulls not supported by as_gpu_matrix"
+                reason="Nulls not supported by values accessor"
             ),
         ),
         {"A": [], "B": []},
         pytest.param(
             {"A": [1, 2, 3], "B": ["a", "b", "c"]},
             marks=pytest.mark.xfail(
-                reason="str or categorical not supported by as_gpu_matrix"
+                reason="str or categorical not supported by values accessor"
             ),
         ),
         pytest.param(
             {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]},
             marks=pytest.mark.xfail(
-                reason="str or categorical not supported by as_gpu_matrix"
+                reason="str or categorical not supported by values accessor"
             ),
         ),
     ],

From a926c52d58c08657f8d437210ce31fddeaa868e7 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 17 Oct 2022 21:12:11 -0500
Subject: [PATCH 042/202] Add `.str.find_multiple` API (#11928)

Resolves: https://github.com/rapidsai/cudf/issues/10126

This PR adds `.str.find_multiple` API.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11928
---
 docs/cudf/source/api_docs/string_handling.rst |  1 +
 python/cudf/cudf/_lib/strings/__init__.py     |  1 +
 python/cudf/cudf/core/column/string.py        | 64 +++++++++++++++++++
 python/cudf/cudf/tests/test_string.py         | 61 ++++++++++++++++++
 4 files changed, 127 insertions(+)

diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst
index 1496d68db6f..2285bb8fb7a 100644
--- a/docs/cudf/source/api_docs/string_handling.rst
+++ b/docs/cudf/source/api_docs/string_handling.rst
@@ -28,6 +28,7 @@ strings and apply several methods to it. These can be accessed like
    filter_tokens
    find
    findall
+   find_multiple
    get
    get_json_object
    hex_to_int
diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
index ff558a06d87..22a5066a20e 100644
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ b/python/cudf/cudf/_lib/strings/__init__.py
@@ -61,6 +61,7 @@
     startswith,
     startswith_multiple,
 )
+from cudf._lib.strings.find_multiple import find_multiple
 from cudf._lib.strings.findall import findall
 from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object
 from cudf._lib.strings.padding import (
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 287e68531f8..c84e4ff4adb 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -3623,6 +3623,70 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex:
         data = libstrings.findall(self._column, pat, flags)
         return self._return_or_inplace(data)
 
+    def find_multiple(self, patterns: SeriesOrIndex) -> "cudf.Series":
+        """
+        Find all first occurrences of patterns in the Series/Index.
+
+        Parameters
+        ----------
+        patterns : array-like, Sequence or Series
+            Patterns to search for in the given Series/Index.
+
+        Returns
+        -------
+        Series
+            A Series with a list of indices of each pattern's first occurrence.
+            If a pattern is not found, -1 is returned for that index.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series(["strings", "to", "search", "in"])
+        >>> s
+        0    strings
+        1         to
+        2     search
+        3         in
+        dtype: object
+        >>> t = cudf.Series(["a", "string", "g", "inn", "o", "r", "sea"])
+        >>> t
+        0         a
+        1    string
+        2         g
+        3       inn
+        4         o
+        5         r
+        6       sea
+        dtype: object
+        >>> s.str.find_multiple(t)
+        0       [-1, 0, 5, -1, -1, 2, -1]
+        1     [-1, -1, -1, -1, 1, -1, -1]
+        2       [2, -1, -1, -1, -1, 3, 0]
+        3    [-1, -1, -1, -1, -1, -1, -1]
+        dtype: list
+        """
+        if can_convert_to_column(patterns):
+            patterns_column = column.as_column(patterns)
+        else:
+            raise TypeError(
+                "patterns should be an array-like or a Series object, "
+                f"found {type(patterns)}"
+            )
+
+        if not isinstance(patterns_column, StringColumn):
+            raise TypeError(
+                "patterns can only be of 'string' dtype, "
+                f"got: {patterns_column.dtype}"
+            )
+
+        return cudf.Series(
+            libstrings.find_multiple(self._column, patterns_column),
+            index=self._parent.index
+            if isinstance(self._parent, cudf.Series)
+            else self._parent,
+            name=self._parent.name,
+        )
+
     def isempty(self) -> SeriesOrIndex:
         """
         Check whether each string is an empty string.
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 74d602c2cf1..2a43adf5a5c 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -3423,3 +3423,64 @@ def test_str_join_lists(sr, sep, string_na_rep, sep_na_rep, expected):
         sep=sep, string_na_rep=string_na_rep, sep_na_rep=sep_na_rep
     )
     assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize(
+    "patterns, expected",
+    [
+        (
+            lambda: ["a", "s", "g", "i", "o", "r"],
+            [
+                [-1, 0, 5, 3, -1, 2],
+                [-1, -1, -1, -1, 1, -1],
+                [2, 0, -1, -1, -1, 3],
+                [-1, -1, -1, 0, -1, -1],
+            ],
+        ),
+        (
+            lambda: cudf.Series(["a", "string", "g", "inn", "o", "r", "sea"]),
+            [
+                [-1, 0, 5, -1, -1, 2, -1],
+                [-1, -1, -1, -1, 1, -1, -1],
+                [2, -1, -1, -1, -1, 3, 0],
+                [-1, -1, -1, -1, -1, -1, -1],
+            ],
+        ),
+    ],
+)
+def test_str_find_multiple(patterns, expected):
+    s = cudf.Series(["strings", "to", "search", "in"])
+    t = patterns()
+
+    expected = cudf.Series(expected)
+
+    # We convert to pandas because find_multiple returns ListDtype(int32)
+    # and expected is ListDtype(int64).
+    # Currently there is no easy way to type-cast these to match.
+    assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas())
+
+    s = cudf.Index(s)
+    t = cudf.Index(t)
+
+    expected.index = s
+
+    assert_eq(s.str.find_multiple(t).to_pandas(), expected.to_pandas())
+
+
+def test_str_find_multiple_error():
+    s = cudf.Series(["strings", "to", "search", "in"])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "patterns should be an array-like or a Series object, found "
+            "<class 'str'>"
+        ),
+    ):
+        s.str.find_multiple("a")
+
+    t = cudf.Series([1, 2, 3])
+    with pytest.raises(
+        TypeError,
+        match=re.escape("patterns can only be of 'string' dtype, got: int64"),
+    ):
+        s.str.find_multiple(t)

From cea10cabd21732fd6334ca9a9956b99acfbf32ec Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 18 Oct 2022 12:07:26 -0500
Subject: [PATCH 043/202] Pin mimesis version in setup.py. (#11906)

The dependency pinning for `mimesis` in cudf's `setup.py` didn't match the conda environment. It was missing a pinning to `<4.1` from #8745. However, based on the conversation in #8551, this pinning of `<4.1` was only chosen because 4.1.0 wasn't yet available on conda-forge. Since the current version of mimesis is now 6.1.1, this PR updates the mimesis pinning to `>=4.1` and uses `generate_string` instead of `schoice`. I tested this locally with mimesis 6.1.1 and mimesis 4.1.0 and both passed tests.

Merge this PR concurrently with https://github.com/rapidsai/integration/pull/547.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/11906
---
 conda/environments/cudf_dev_cuda11.5.yml      | 2 +-
 python/cudf/cudf/testing/dataset_generator.py | 4 ++--
 python/cudf/setup.py                          | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 142d3c7d9cb..e27e8557c80 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -63,7 +63,7 @@ dependencies:
   - myst-nb
   - scipy
   - dask-cuda=22.12.*
-  - mimesis<4.1
+  - mimesis>=4.1.0
   - packaging
   - protobuf>=3.20.1,<3.21.0a0
   - nvtx>=0.2.1
diff --git a/python/cudf/cudf/testing/dataset_generator.py b/python/cudf/cudf/testing/dataset_generator.py
index 4d24e7ff2a2..2867c4d10eb 100644
--- a/python/cudf/cudf/testing/dataset_generator.py
+++ b/python/cudf/cudf/testing/dataset_generator.py
@@ -502,7 +502,7 @@ def rand_dataframe(
                         cardinality=cardinality,
                         null_frequency=null_frequency,
                         generator=lambda cardinality=cardinality: [
-                            mimesis.random.random.schoice(
+                            mimesis.random.random.generate_string(
                                 string.printable,
                                 np.random.randint(
                                     low=0,
@@ -684,7 +684,7 @@ def get_values_for_nested_data(dtype, lists_max_length=None, size=None):
         values = float_generator(dtype=dtype, size=cardinality)()
     elif dtype.kind in ("U", "O"):
         values = [
-            mimesis.random.random.schoice(
+            mimesis.random.random.generate_string(
                 string.printable,
                 100,
             )
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 93948afc0f6..3ebb66cb0ad 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -31,7 +31,7 @@
         "pytest-benchmark",
         "pytest-xdist",
         "hypothesis",
-        "mimesis",
+        "mimesis>=4.1.0",
         "fastavro>=0.22.9",
         "python-snappy>=0.6.0",
         "pyorc",

From 1effe19ab6384b229fc7e58c8109bbf279e3ac61 Mon Sep 17 00:00:00 2001
From: Mike Wilson <hyperbolic2346@users.noreply.github.com>
Date: Tue, 18 Oct 2022 13:09:19 -0400
Subject: [PATCH 044/202] Removing int8 column option from parquet byte_array
 writing (#11539)

As suggested in #11526 and captured in issue #11536 the usage of both INT8 and UINT8 as supported types for byte_arrays is unnecessary and adds complexity to the code. This change removes INT8 as an option and only allows UINT8 columns to be written out as byte_arrays. ~~This matches with cudf string columns which contain an INT8 column for data.~~

closes #11536

Authors:
  - Mike Wilson (https://github.com/hyperbolic2346)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - MithunR (https://github.com/mythrocks)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11539
---
 cpp/src/io/parquet/parquet_gpu.hpp            |  5 +--
 cpp/src/io/parquet/writer_impl.cu             |  2 +-
 cpp/src/io/utilities/column_buffer.cpp        | 21 ++++++++---
 cpp/src/io/utilities/column_utils.cuh         |  4 +--
 cpp/src/lists/dremel.cu                       |  3 +-
 cpp/src/reshape/byte_cast.cu                  | 16 +++++++--
 cpp/tests/io/parquet_test.cpp                 | 36 +++++++++----------
 cpp/tests/reshape/byte_cast_tests.cpp         |  4 +--
 .../java/ai/rapids/cudf/ColumnVectorTest.java |  2 +-
 .../test/java/ai/rapids/cudf/TableTest.java   |  2 +-
 10 files changed, 57 insertions(+), 38 deletions(-)

diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 1a8c0f4cd9e..38a0d70b0f8 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -321,10 +321,7 @@ inline size_type __device__ row_to_value_idx(size_type idx,
     } else {
       auto list_col = cudf::detail::lists_column_device_view(col);
       auto child    = list_col.child();
-      if (parquet_col.output_as_byte_array &&
-          (child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8)) {
-        break;
-      }
+      if (parquet_col.output_as_byte_array && child.type().id() == type_id::UINT8) { break; }
       idx = list_col.offset_at(idx);
       col = child;
     }
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 9514b053451..f2089d27a87 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -511,7 +511,7 @@ std::vector<schema_tree_node> construct_schema_tree(
         if (col->type().id() != type_id::LIST) { return false; }
         auto const child_col_type =
           col->children[lists_column_view::child_column_index]->type().id();
-        return child_col_type == type_id::INT8 or child_col_type == type_id::UINT8;
+        return child_col_type == type_id::UINT8;
       };
 
       // There is a special case for a list<int8> column with one byte column child. This column can
diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp
index e2d209a7c0a..de145486662 100644
--- a/cpp/src/io/utilities/column_buffer.cpp
+++ b/cpp/src/io/utilities/column_buffer.cpp
@@ -22,6 +22,7 @@
 #include "column_buffer.hpp"
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/strings/strings_column_view.hpp>
+#include <cudf/types.hpp>
 
 namespace cudf {
 namespace io {
@@ -78,7 +79,19 @@ std::unique_ptr<column> make_column(column_buffer& buffer,
         // convert to binary
         auto const string_col = make_strings_column(*buffer._strings, stream, mr);
         auto const num_rows   = string_col->size();
-        auto col_contest      = string_col->release();
+        auto col_content      = string_col->release();
+
+        // convert to uint8 column, strings are currently stores as int8
+        auto contents =
+          col_content.children[strings_column_view::chars_column_index].release()->release();
+        auto data      = contents.data.release();
+        auto null_mask = contents.null_mask.release();
+
+        auto uint8_col = std::make_unique<column>(data_type{type_id::UINT8},
+                                                  data->size(),
+                                                  std::move(*data),
+                                                  std::move(*null_mask),
+                                                  UNKNOWN_NULL_COUNT);
 
         if (schema_info != nullptr) {
           schema_info->children.push_back(column_name_info{"offsets"});
@@ -87,10 +100,10 @@ std::unique_ptr<column> make_column(column_buffer& buffer,
 
         return make_lists_column(
           num_rows,
-          std::move(col_contest.children[strings_column_view::offsets_column_index]),
-          std::move(col_contest.children[strings_column_view::chars_column_index]),
+          std::move(col_content.children[strings_column_view::offsets_column_index]),
+          std::move(uint8_col),
           UNKNOWN_NULL_COUNT,
-          std::move(*col_contest.null_mask));
+          std::move(*col_content.null_mask));
       }
 
     case type_id::LIST: {
diff --git a/cpp/src/io/utilities/column_utils.cuh b/cpp/src/io/utilities/column_utils.cuh
index fbeaaa9c0fc..598c93a1a4f 100644
--- a/cpp/src/io/utilities/column_utils.cuh
+++ b/cpp/src/io/utilities/column_utils.cuh
@@ -64,7 +64,7 @@ rmm::device_uvector<column_device_view> create_leaf_column_device_views(
     iter,
     iter + parent_table_device_view.num_columns(),
     [col_desc, parent_col_view = parent_table_device_view, leaf_columns] __device__(
-      size_type index) mutable {
+      size_type index) {
       col_desc[index].parent_column = parent_col_view.begin() + index;
       column_device_view col        = parent_col_view.column(index);
       // traverse till leaf column
@@ -74,7 +74,7 @@ rmm::device_uvector<column_device_view> create_leaf_column_device_views(
                              : col.child(0);
         // stop early if writing a byte array
         if (col_desc[index].stats_dtype == dtype_byte_array &&
-            (child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8)) {
+            child.type().id() == type_id::UINT8) {
           break;
         }
         col = child;
diff --git a/cpp/src/lists/dremel.cu b/cpp/src/lists/dremel.cu
index 25094536cce..66134138a5c 100644
--- a/cpp/src/lists/dremel.cu
+++ b/cpp/src/lists/dremel.cu
@@ -192,8 +192,7 @@ dremel_data get_dremel_data(column_view h_col,
     }
     if (curr_col.type().id() == type_id::LIST) {
       auto child = curr_col.child(lists_column_view::child_column_index);
-      if ((child.type().id() == type_id::INT8 || child.type().id() == type_id::UINT8) &&
-          output_as_byte_array) {
+      if (output_as_byte_array && child.type().id() == type_id::UINT8) {
         // consider this the bottom
         break;
       }
diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu
index 639ddb33e9a..3d0510e1e6b 100644
--- a/cpp/src/reshape/byte_cast.cu
+++ b/cpp/src/reshape/byte_cast.cu
@@ -101,11 +101,21 @@ std::unique_ptr<cudf::column> byte_list_conversion::operator()<string_view>(
   auto strings_count = input_strings.size();
   if (strings_count == 0) return cudf::empty_like(input_column);
 
-  auto contents = std::make_unique<column>(input_column, stream, mr)->release();
+  auto col_content = std::make_unique<column>(input_column, stream, mr)->release();
+  auto contents =
+    col_content.children[strings_column_view::chars_column_index].release()->release();
+  auto data      = contents.data.release();
+  auto null_mask = contents.null_mask.release();
+  auto uint8_col = std::make_unique<column>(data_type{type_id::UINT8},
+                                            data->size(),
+                                            std::move(*data),
+                                            std::move(*null_mask),
+                                            UNKNOWN_NULL_COUNT);
+
   return make_lists_column(
     input_column.size(),
-    std::move(contents.children[cudf::strings_column_view::offsets_column_index]),
-    std::move(contents.children[cudf::strings_column_view::chars_column_index]),
+    std::move(col_content.children[cudf::strings_column_view::offsets_column_index]),
+    std::move(uint8_col),
     input_column.null_count(),
     detail::copy_bitmask(input_column, stream, mr),
     stream,
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 6f1c5ef7eb1..b13e875eabd 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -718,17 +718,17 @@ TEST_F(ParquetWriterTest, StringsAsBinary)
   column_wrapper<cudf::string_view> col0{ascii_strings.begin(), ascii_strings.end()};
   column_wrapper<cudf::string_view> col1{unicode_strings.begin(), unicode_strings.end()};
   column_wrapper<cudf::string_view> col2{ascii_strings.begin(), ascii_strings.end()};
-  cudf::test::lists_column_wrapper<int8_t> col3{{'M', 'o', 'n', 'd', 'a', 'y'},
-                                                {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'M', 'o', 'n', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'F', 'r', 'i', 'd', 'a', 'y'},
-                                                {'F', 'u', 'n', 'd', 'a', 'y'}};
-  cudf::test::lists_column_wrapper<int8_t> col4{
+  cudf::test::lists_column_wrapper<uint8_t> col3{{'M', 'o', 'n', 'd', 'a', 'y'},
+                                                 {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'M', 'o', 'n', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'r', 'i', 'd', 'a', 'y'},
+                                                 {'F', 'u', 'n', 'd', 'a', 'y'}};
+  cudf::test::lists_column_wrapper<uint8_t> col4{
     {'M', 'o', 'n', 'd', 'a', 'y'},
-    {'W', -56, -123, 'd', 'n', -56, -123, 's', 'd', 'a', 'y'},
+    {'W', 200, 133, 'd', 'n', 200, 133, 's', 'd', 'a', 'y'},
     {'F', 'r', 'i', 'd', 'a', 'y'},
     {'M', 'o', 'n', 'd', 'a', 'y'},
     {'F', 'r', 'i', 'd', 'a', 'y'},
@@ -4459,13 +4459,13 @@ TEST_F(ParquetReaderTest, BinaryAsStrings)
 
   auto seq_col0 = random_values<int>(num_rows);
   auto seq_col2 = random_values<float>(num_rows);
-  auto seq_col3 = random_values<int8_t>(num_rows);
+  auto seq_col3 = random_values<uint8_t>(num_rows);
   auto validity = cudf::test::iterators::no_nulls();
 
   column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
   column_wrapper<cudf::string_view> string_col{strings.begin(), strings.end()};
   column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
-  cudf::test::lists_column_wrapper<int8_t> list_int_col{
+  cudf::test::lists_column_wrapper<uint8_t> list_int_col{
     {'M', 'o', 'n', 'd', 'a', 'y'},
     {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
     {'F', 'r', 'i', 'd', 'a', 'y'},
@@ -4526,12 +4526,12 @@ TEST_F(ParquetReaderTest, NestedByteArray)
 
   auto seq_col0       = random_values<int>(num_rows);
   auto seq_col2       = random_values<float>(num_rows);
-  auto seq_col3       = random_values<int8_t>(num_rows);
+  auto seq_col3       = random_values<uint8_t>(num_rows);
   auto const validity = cudf::test::iterators::no_nulls();
 
   column_wrapper<int> int_col{seq_col0.begin(), seq_col0.end(), validity};
   column_wrapper<float> float_col{seq_col2.begin(), seq_col2.end(), validity};
-  cudf::test::lists_column_wrapper<int8_t> list_list_int_col{
+  cudf::test::lists_column_wrapper<uint8_t> list_list_int_col{
     {{'M', 'o', 'n', 'd', 'a', 'y'},
      {'W', 'e', 'd', 'n', 'e', 's', 'd', 'a', 'y'},
      {'F', 'r', 'i', 'd', 'a', 'y'}},
@@ -4637,12 +4637,12 @@ TEST_F(ParquetReaderTest, StructByteArray)
 {
   constexpr auto num_rows = 100;
 
-  auto seq_col0       = random_values<int8_t>(num_rows);
+  auto seq_col0       = random_values<uint8_t>(num_rows);
   auto const validity = cudf::test::iterators::no_nulls();
 
-  column_wrapper<int8_t> int_col{seq_col0.begin(), seq_col0.end(), validity};
-  cudf::test::lists_column_wrapper<int8_t> list_of_int{{seq_col0.begin(), seq_col0.begin() + 50},
-                                                       {seq_col0.begin() + 50, seq_col0.end()}};
+  column_wrapper<uint8_t> int_col{seq_col0.begin(), seq_col0.end(), validity};
+  cudf::test::lists_column_wrapper<uint8_t> list_of_int{{seq_col0.begin(), seq_col0.begin() + 50},
+                                                        {seq_col0.begin() + 50, seq_col0.end()}};
   auto struct_col = cudf::test::structs_column_wrapper{{list_of_int}, validity};
 
   auto const expected = table_view{{struct_col}};
diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp
index e5f3b8a1f7f..f29b3a7980f 100644
--- a/cpp/tests/reshape/byte_cast_tests.cpp
+++ b/cpp/tests/reshape/byte_cast_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -327,7 +327,7 @@ TEST_F(ByteCastTest, StringValues)
 {
   strings_column_wrapper const strings_col(
     {"", "The quick", " brown fox...", "!\"#$%&\'()*+,-./", "0123456789:;<=>?@", "[\\]^_`{|}~"});
-  lists_column_wrapper<int8_t> const strings_expected(
+  lists_column_wrapper<uint8_t> const strings_expected(
     {{},
      {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b},
      {0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x2e, 0x2e, 0x2e},
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index 7afd5abb358..f5c32b0da20 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -3942,7 +3942,7 @@ void testCastStringToByteList() {
     "\\THE\t8\ud720", "tést strings", "", "éé");
         ColumnVector res = cv.asByteList(true);
         ColumnVector expected = ColumnVector.fromLists(new HostColumnVector.ListType(true,
-          new HostColumnVector.BasicType(true, DType.INT8)), list1, list2, list3, list4, list5,
+          new HostColumnVector.BasicType(true, DType.UINT8)), list1, list2, list3, list4, list5,
           list6, list7, list8)) {
       assertColumnsAreEqual(expected, res);
     }
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 4649a0e3507..f31da054091 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -621,7 +621,7 @@ void testParquetWriteToBufferChunkedBinary() {
     List<Byte> bin2 = asList(string2);
 
     try (Table binTable = new Table.TestBuilder()
-        .column(new ListType(true, new BasicType(false, DType.INT8)),
+        .column(new ListType(true, new BasicType(false, DType.UINT8)),
             bin1, bin2)
         .build();
          Table stringTable = new Table.TestBuilder()

From 5d5715933c97c118255010b57bb07bd58651e218 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 18 Oct 2022 11:46:57 -0700
Subject: [PATCH 045/202] Initial draft of policies and guidelines for libcudf
 usage. (#11853)

This PR adds a section to the developer documentation about various libcudf design decisions that affect users. These policies are important for us to document and communicate consistently. I am not sure what the best place for this information is, but I think the developer docs are a good place to start since until we address #11481 we don't have a great way to publish any non-API user-facing libcudf documentation. I've created this draft PR to solicit feedback from other libcudf devs about other policies that we should be documenting in a similar manner. Once everyone is happy with the contents, I would suggest that we merge this into the dev docs for now and then revisit a better place once we've tackled #11481.

Partly addresses #5505, #1781.

Resolves #4511.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/11853
---
 .../developer_guide/DEVELOPER_GUIDE.md        | 57 +++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index b3774aeda38..52c443cd764 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -346,6 +346,63 @@ the device view can be obtained via function `column_device_view::create(column_
 data, a specialized device view for list columns can be constructed via
 `lists_column_device_view(column_device_view)`.
 
+# libcudf Policies and Design Principles
+
+`libcudf` is designed to provide thread-safe, single-GPU accelerated algorithm primitives for solving a wide variety of problems that arise in data science.
+APIs are written to execute on the default GPU, which can be controlled by the caller through standard CUDA device APIs or environment variables like `CUDA_VISIBLE_DEVICES`.
+Our goal is to enable diverse use cases like Spark or Pandas to benefit from the performance of GPUs, and libcudf relies on these higher-level layers like Spark or Dask to orchestrate multi-GPU tasks.
+
+To best satisfy these use-cases, libcudf prioritizes performance and flexibility, which sometimes may come at the cost of convenience.
+While we welcome users to use libcudf directly, we design with the expectation that most users will be consuming libcudf through higher-level layers like Spark or cuDF Python that handle some of details that direct users of libcudf must handle on their own.
+We document these policies and the reasons behind them here.
+
+## libcudf does not introspect data
+
+libcudf APIs generally do not perform deep introspection and validation of input data.
+There are numerous reasons for this:
+1. It violates the single responsibility principle: validation is separate from execution.
+2. Since libcudf data structures store data on the GPU, any validation incurs _at minimum_ the overhead of a kernel launch, and may in general be prohibitively expensive.
+3. API promises around data introspection often significantly complicate implementation.
+
+Users are therefore responsible for passing valid data into such APIs.
+_Note that this policy does not mean that libcudf performs no validation whatsoever_.
+libcudf APIs should still perform any validation that does not require introspection.
+To give some idea of what should or should not be validated, here are (non-exhaustive) lists of examples.
+
+**Things that libcudf should validate**:
+- Input column/table sizes or dtypes
+
+**Things that libcudf should not validate**:
+- Integer overflow
+- Ensuring that outputs will not exceed the 2GB size limit for a given set of inputs
+
+
+## libcudf expects nested types to have sanitized null masks
+
+Various libcudf APIs accepting columns of nested dtypes (such as `LIST` or `STRUCT`) may assume that these columns have been sanitized.
+In this context, sanitization refers to ensuring that the null elements in a column with a nested dtype are compatible with the elements of nested columns.
+Specifically:
+- Null elements of list columns should also be empty. The starting offset of a null element should be equal to the ending offset.
+- Null elements of struct columns should also be null elements in the underlying structs.
+- For compound columns, nulls should only be present at the level of the parent column. Child columns should not contain nulls.
+- Slice operations on nested columns do not propagate offsets to child columns.
+
+libcudf APIs _should_ promise to never return "dirty" columns, i.e. columns containing unsanitized data.
+Therefore, the only problem is if users construct input columns that are not correctly sanitized and then pass those into libcudf APIs.
+
+## Treat libcudf APIs as if they were asynchronous
+
+libcudf APIs called on the host do not guarantee that the stream is synchronized before returning.
+Work in libcudf occurs on `cudf::get_default_stream().value`, which defaults to the CUDA default stream (stream 0).
+Note that the stream 0 behavior differs if [per-thread default stream is enabled](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html) via `CUDF_USE_PER_THREAD_DEFAULT_STREAM`.
+Any data provided to or returned by libcudf that uses a separate non-blocking stream requires synchronization with the default libcudf stream to ensure stream safety.
+
+## libcudf generally does not make ordering guarantees
+
+Functions like merge or groupby in libcudf make no guarantees about the order of entries in the output.
+Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
+Calling code is responsible for performing sorts after the fact if sorted outputs are needed.
+
 # libcudf++ API and Implementation
 
 ## Streams

From 425fb029057858797a167f237c9dd6d2d93e2645 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 18 Oct 2022 15:47:03 -0500
Subject: [PATCH 046/202] Update flake8 to 5.0.4 and use flake8-force to check
 Cython. (#11736)

Resolves #11684, required for eventually supporting Python 3.10 (which requires flake8 >= 4.0.0). flake8 >= 4.0.0, however, does not support parsing Cython code, even with rule exclusions. This necessitates the flake8-force plugin, which was designed (by a cupy developer) for forcing flake8 to check Cython code with a limited set of rules.

Per this comment (https://github.com/rapidsai/cudf/issues/11684#issuecomment-1258747331), this PR removes duplicate pinnings between pre-commit configuration and the developer conda environment. Developers should use pre-commit for style checks consistent with the CI environment.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11736
---
 .pre-commit-config.yaml                    |   6 +-
 conda/environments/cudf_dev_cuda11.5.yml   |   8 +-
 python/cudf/cudf/_lib/cpp/io/avro.pxd      |  26 +-
 python/cudf/cudf/_lib/cpp/io/csv.pxd       | 290 ++++++++++-----------
 python/cudf/cudf/_lib/cpp/io/json.pxd      |  64 ++---
 python/cudf/cudf/_lib/cpp/io/orc.pxd       | 172 ++++++------
 python/cudf/cudf/_lib/cpp/io/parquet.pxd   |  62 ++---
 python/cudf/cudf/core/dataframe.py         |  16 +-
 python/cudf/cudf/core/indexed_frame.py     |   8 +-
 python/cudf/cudf/core/series.py            |   4 +-
 python/cudf/cudf/utils/hash_vocab_utils.py |   7 +-
 setup.cfg                                  |   9 +-
 12 files changed, 342 insertions(+), 330 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 1046f4ebe6f..b4e57947cf9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,12 +18,14 @@ repos:
                 # Explicitly specify the pyproject.toml at the repo root, not per-project.
                 args: ["--config", "pyproject.toml"]
       - repo: https://github.com/PyCQA/flake8
-        rev: 3.8.3
+        rev: 5.0.4
         hooks:
               - id: flake8
                 args: ["--config=setup.cfg"]
-                files: python/.*\.(py|pyx|pxd)$
+                files: python/.*$
                 types: [file]
+                types_or: [python, cython]
+                additional_dependencies: ["flake8-force"]
       - repo: https://github.com/pre-commit/mirrors-mypy
         rev: 'v0.971'
         hooks:
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index e27e8557c80..d7178198358 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -38,15 +38,9 @@ dependencies:
   - ipython
   - pandoc<=2.0.0
   - cudatoolkit=11.5
-  - cuda-python >=11.5,<11.7.1
+  - cuda-python>=11.5,<11.7.1
   - pip
-  - flake8=3.8.3
-  - black=22.3.0
-  - isort=5.10.1
-  - mypy=0.971
-  - types-cachetools
   - doxygen=1.8.20
-  - pydocstyle=6.1.1
   - typing_extensions
   - pre-commit
   - dask>=2022.9.2
diff --git a/python/cudf/cudf/_lib/cpp/io/avro.pxd b/python/cudf/cudf/_lib/cpp/io/avro.pxd
index 6efe42e5208..9b683e5bce3 100644
--- a/python/cudf/cudf/_lib/cpp/io/avro.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/avro.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -11,17 +11,17 @@ cdef extern from "cudf/io/avro.hpp" \
         namespace "cudf::io" nogil:
 
     cdef cppclass avro_reader_options:
-        avro_reader_options() except+
-        cudf_io_types.source_info get_source() except+
-        vector[string] get_columns() except+
-        size_type get_skip_rows() except+
-        size_type get_num_rows() except+
+        avro_reader_options() except +
+        cudf_io_types.source_info get_source() except +
+        vector[string] get_columns() except +
+        size_type get_skip_rows() except +
+        size_type get_num_rows() except +
 
         # setters
 
-        void set_columns(vector[string] col_names) except+
-        void set_skip_rows(size_type val) except+
-        void set_num_rows(size_type val) except+
+        void set_columns(vector[string] col_names) except +
+        void set_skip_rows(size_type val) except +
+        void set_num_rows(size_type val) except +
 
         @staticmethod
         avro_reader_options_builder builder(
@@ -29,13 +29,13 @@ cdef extern from "cudf/io/avro.hpp" \
         ) except +
 
     cdef cppclass avro_reader_options_builder:
-        avro_reader_options_builder() except+
+        avro_reader_options_builder() except +
         avro_reader_options_builder(
             cudf_io_types.source_info src
         ) except +
-        avro_reader_options_builder& columns(vector[string] col_names) except+
-        avro_reader_options_builder& skip_rows(size_type val) except+
-        avro_reader_options_builder& num_rows(size_type val) except+
+        avro_reader_options_builder& columns(vector[string] col_names) except +
+        avro_reader_options_builder& skip_rows(size_type val) except +
+        avro_reader_options_builder& num_rows(size_type val) except +
 
         avro_reader_options build() except +
 
diff --git a/python/cudf/cudf/_lib/cpp/io/csv.pxd b/python/cudf/cudf/_lib/cpp/io/csv.pxd
index 4afd8732320..e8064557592 100644
--- a/python/cudf/cudf/_lib/cpp/io/csv.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/csv.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint8_t
 from libcpp cimport bool
@@ -20,96 +20,96 @@ cdef extern from "cudf/io/csv.hpp" \
 
         # Getter
 
-        cudf_io_types.source_info get_source() except+
+        cudf_io_types.source_info get_source() except +
         # Reader settings
-        cudf_io_types.compression_type get_compression() except+
-        size_t get_byte_range_offset() except+
-        size_t get_byte_range_size() except+
-        vector[string] get_names() except+
-        string get_prefix() except+
-        bool is_enabled_mangle_dupe_cols() except+
+        cudf_io_types.compression_type get_compression() except +
+        size_t get_byte_range_offset() except +
+        size_t get_byte_range_size() except +
+        vector[string] get_names() except +
+        string get_prefix() except +
+        bool is_enabled_mangle_dupe_cols() except +
 
         # Filter settings
-        vector[string] get_use_cols_names() except+
-        vector[int] get_use_cols_indexes() except+
-        size_type get_nrows() except+
-        size_type get_skiprows() except+
-        size_type get_skipfooter() except+
-        size_type get_header() except+
+        vector[string] get_use_cols_names() except +
+        vector[int] get_use_cols_indexes() except +
+        size_type get_nrows() except +
+        size_type get_skiprows() except +
+        size_type get_skipfooter() except +
+        size_type get_header() except +
 
         # Parsing settings
-        char get_lineterminator() except+
-        char get_delimiter() except+
-        char get_thousands() except+
-        char get_decimal() except+
-        char get_comment() except+
-        bool is_enabled_windowslinetermination() except+
-        bool is_enabled_delim_whitespace() except+
-        bool is_enabled_skipinitialspace() except+
-        bool is_enabled_skip_blank_lines() except+
-        cudf_io_types.quote_style get_quoting() except+
-        char get_quotechar() except+
-        bool is_enabled_doublequote() except+
-        vector[string] get_parse_dates_names() except+
-        vector[int] get_parse_dates_indexes() except+
-        vector[string] get_parse_hex_names() except+
-        vector[int] get_parse_hex_indexes() except+
+        char get_lineterminator() except +
+        char get_delimiter() except +
+        char get_thousands() except +
+        char get_decimal() except +
+        char get_comment() except +
+        bool is_enabled_windowslinetermination() except +
+        bool is_enabled_delim_whitespace() except +
+        bool is_enabled_skipinitialspace() except +
+        bool is_enabled_skip_blank_lines() except +
+        cudf_io_types.quote_style get_quoting() except +
+        char get_quotechar() except +
+        bool is_enabled_doublequote() except +
+        vector[string] get_parse_dates_names() except +
+        vector[int] get_parse_dates_indexes() except +
+        vector[string] get_parse_hex_names() except +
+        vector[int] get_parse_hex_indexes() except +
 
         # Conversion settings
-        vector[string] get_dtype() except+
-        vector[string] get_true_values() except+
-        vector[string] get_false_values() except+
-        vector[string] get_na_values() except+
-        bool is_enabled_keep_default_na() except+
-        bool is_enabled_na_filter() except+
-        bool is_enabled_dayfirst() except+
+        vector[string] get_dtype() except +
+        vector[string] get_true_values() except +
+        vector[string] get_false_values() except +
+        vector[string] get_na_values() except +
+        bool is_enabled_keep_default_na() except +
+        bool is_enabled_na_filter() except +
+        bool is_enabled_dayfirst() except +
 
         # setter
 
         # Reader settings
-        void set_compression(cudf_io_types.compression_type comp) except+
-        void set_byte_range_offset(size_t val) except+
-        void set_byte_range_size(size_t val) except+
-        void set_names(vector[string] val) except+
-        void set_prefix(string pfx) except+
-        void set_mangle_dupe_cols(bool val) except+
+        void set_compression(cudf_io_types.compression_type comp) except +
+        void set_byte_range_offset(size_t val) except +
+        void set_byte_range_size(size_t val) except +
+        void set_names(vector[string] val) except +
+        void set_prefix(string pfx) except +
+        void set_mangle_dupe_cols(bool val) except +
 
         # Filter settings
-        void set_use_cols_names(vector[string] col_names) except+
-        void set_use_cols_indexes(vector[int] col_ind) except+
-        void set_nrows(size_type n_rows) except+
-        void set_skiprows(size_type val) except+
-        void set_skipfooter(size_type val) except+
-        void set_header(size_type hdr) except+
+        void set_use_cols_names(vector[string] col_names) except +
+        void set_use_cols_indexes(vector[int] col_ind) except +
+        void set_nrows(size_type n_rows) except +
+        void set_skiprows(size_type val) except +
+        void set_skipfooter(size_type val) except +
+        void set_header(size_type hdr) except +
 
         # Parsing settings
-        void set_lineterminator(char val) except+
-        void set_delimiter(char val) except+
-        void set_thousands(char val) except+
-        void set_decimal(char val) except+
-        void set_comment(char val) except+
-        void enable_windowslinetermination(bool val) except+
-        void enable_delim_whitespace(bool val) except+
-        void enable_skipinitialspace(bool val) except+
-        void enable_skip_blank_lines(bool val) except+
-        void set_quoting(cudf_io_types.quote_style style) except+
-        void set_quotechar(char val) except+
-        void set_doublequote(bool val) except+
-        void set_parse_dates(vector[string]) except+
-        void set_parse_dates(vector[int]) except+
-        void set_parse_hex(vector[string]) except+
-        void set_parse_hex(vector[int]) except+
+        void set_lineterminator(char val) except +
+        void set_delimiter(char val) except +
+        void set_thousands(char val) except +
+        void set_decimal(char val) except +
+        void set_comment(char val) except +
+        void enable_windowslinetermination(bool val) except +
+        void enable_delim_whitespace(bool val) except +
+        void enable_skipinitialspace(bool val) except +
+        void enable_skip_blank_lines(bool val) except +
+        void set_quoting(cudf_io_types.quote_style style) except +
+        void set_quotechar(char val) except +
+        void set_doublequote(bool val) except +
+        void set_parse_dates(vector[string]) except +
+        void set_parse_dates(vector[int]) except +
+        void set_parse_hex(vector[string]) except +
+        void set_parse_hex(vector[int]) except +
 
         # Conversion settings
-        void set_dtypes(vector[data_type] types) except+
-        void set_dtypes(map[string, data_type] types) except+
-        void set_true_values(vector[string] vals) except+
-        void set_false_values(vector[string] vals) except+
-        void set_na_values(vector[string] vals) except+
-        void enable_keep_default_na(bool val) except+
-        void enable_na_filter(bool val) except+
-        void enable_dayfirst(bool val) except+
-        void set_timestamp_type(data_type type) except+
+        void set_dtypes(vector[data_type] types) except +
+        void set_dtypes(map[string, data_type] types) except +
+        void set_true_values(vector[string] vals) except +
+        void set_false_values(vector[string] vals) except +
+        void set_na_values(vector[string] vals) except +
+        void enable_keep_default_na(bool val) except +
+        void enable_na_filter(bool val) except +
+        void enable_dayfirst(bool val) except +
+        void set_timestamp_type(data_type type) except +
 
         @staticmethod
         csv_reader_options_builder builder(
@@ -125,115 +125,115 @@ cdef extern from "cudf/io/csv.hpp" \
 
         csv_reader_options_builder& source(
             cudf_io_types.source_info info
-        ) except+
+        ) except +
         # Reader settings
         csv_reader_options_builder& compression(
             cudf_io_types.compression_type comp
-        ) except+
-        csv_reader_options_builder& byte_range_offset(size_t val) except+
-        csv_reader_options_builder& byte_range_size(size_t val) except+
-        csv_reader_options_builder& names(vector[string] val) except+
-        csv_reader_options_builder& prefix(string pfx) except+
-        csv_reader_options_builder& mangle_dupe_cols(bool val) except+
+        ) except +
+        csv_reader_options_builder& byte_range_offset(size_t val) except +
+        csv_reader_options_builder& byte_range_size(size_t val) except +
+        csv_reader_options_builder& names(vector[string] val) except +
+        csv_reader_options_builder& prefix(string pfx) except +
+        csv_reader_options_builder& mangle_dupe_cols(bool val) except +
 
         # Filter settings
         csv_reader_options_builder& use_cols_names(
             vector[string] col_names
-        ) except+
+        ) except +
         csv_reader_options_builder& use_cols_indexes(
             vector[int] col_ind
-        ) except+
-        csv_reader_options_builder& nrows(size_type n_rows) except+
-        csv_reader_options_builder& skiprows(size_type val) except+
-        csv_reader_options_builder& skipfooter(size_type val) except+
-        csv_reader_options_builder& header(size_type hdr) except+
+        ) except +
+        csv_reader_options_builder& nrows(size_type n_rows) except +
+        csv_reader_options_builder& skiprows(size_type val) except +
+        csv_reader_options_builder& skipfooter(size_type val) except +
+        csv_reader_options_builder& header(size_type hdr) except +
 
         # Parsing settings
-        csv_reader_options_builder& lineterminator(char val) except+
-        csv_reader_options_builder& delimiter(char val) except+
-        csv_reader_options_builder& thousands(char val) except+
-        csv_reader_options_builder& decimal(char val) except+
-        csv_reader_options_builder& comment(char val) except+
-        csv_reader_options_builder& windowslinetermination(bool val) except+
-        csv_reader_options_builder& delim_whitespace(bool val) except+
-        csv_reader_options_builder& skipinitialspace(bool val) except+
-        csv_reader_options_builder& skip_blank_lines(bool val) except+
+        csv_reader_options_builder& lineterminator(char val) except +
+        csv_reader_options_builder& delimiter(char val) except +
+        csv_reader_options_builder& thousands(char val) except +
+        csv_reader_options_builder& decimal(char val) except +
+        csv_reader_options_builder& comment(char val) except +
+        csv_reader_options_builder& windowslinetermination(bool val) except +
+        csv_reader_options_builder& delim_whitespace(bool val) except +
+        csv_reader_options_builder& skipinitialspace(bool val) except +
+        csv_reader_options_builder& skip_blank_lines(bool val) except +
         csv_reader_options_builder& quoting(
             cudf_io_types.quote_style style
-        ) except+
-        csv_reader_options_builder& quotechar(char val) except+
-        csv_reader_options_builder& doublequote(bool val) except+
-        csv_reader_options_builder& parse_dates(vector[string]) except+
-        csv_reader_options_builder& parse_dates(vector[int]) except+
+        ) except +
+        csv_reader_options_builder& quotechar(char val) except +
+        csv_reader_options_builder& doublequote(bool val) except +
+        csv_reader_options_builder& parse_dates(vector[string]) except +
+        csv_reader_options_builder& parse_dates(vector[int]) except +
 
         # Conversion settings
-        csv_reader_options_builder& dtypes(vector[string] types) except+
-        csv_reader_options_builder& dtypes(vector[data_type] types) except+
+        csv_reader_options_builder& dtypes(vector[string] types) except +
+        csv_reader_options_builder& dtypes(vector[data_type] types) except +
         csv_reader_options_builder& dtypes(
             map[string, data_type] types
-        ) except+
-        csv_reader_options_builder& true_values(vector[string] vals) except+
-        csv_reader_options_builder& false_values(vector[string] vals) except+
-        csv_reader_options_builder& na_values(vector[string] vals) except+
-        csv_reader_options_builder& keep_default_na(bool val) except+
-        csv_reader_options_builder& na_filter(bool val) except+
-        csv_reader_options_builder& dayfirst(bool val) except+
-        csv_reader_options_builder& timestamp_type(data_type type) except+
+        ) except +
+        csv_reader_options_builder& true_values(vector[string] vals) except +
+        csv_reader_options_builder& false_values(vector[string] vals) except +
+        csv_reader_options_builder& na_values(vector[string] vals) except +
+        csv_reader_options_builder& keep_default_na(bool val) except +
+        csv_reader_options_builder& na_filter(bool val) except +
+        csv_reader_options_builder& dayfirst(bool val) except +
+        csv_reader_options_builder& timestamp_type(data_type type) except +
 
-        csv_reader_options build() except+
+        csv_reader_options build() except +
 
     cdef cudf_io_types.table_with_metadata read_csv(
         csv_reader_options &options
     ) except +
 
     cdef cppclass csv_writer_options:
-        csv_writer_options() except+
-
-        cudf_io_types.sink_info get_sink() except+
-        cudf_table_view.table_view get_table() except+
-        cudf_io_types.table_metadata get_metadata() except+
-        string get_na_rep() except+
-        bool is_enabled_include_header() except+
-        size_type get_rows_per_chunk() except+
-        string get_line_terminator() except+
-        char get_inter_column_delimiter() except+
-        string get_true_value() except+
-        string get_false_value() except+
+        csv_writer_options() except +
+
+        cudf_io_types.sink_info get_sink() except +
+        cudf_table_view.table_view get_table() except +
+        cudf_io_types.table_metadata get_metadata() except +
+        string get_na_rep() except +
+        bool is_enabled_include_header() except +
+        size_type get_rows_per_chunk() except +
+        string get_line_terminator() except +
+        char get_inter_column_delimiter() except +
+        string get_true_value() except +
+        string get_false_value() except +
 
         # setter
-        void set_metadata(cudf_io_types.table_metadata* val) except+
-        void set_na_rep(string val) except+
-        void enable_include_header(bool val) except+
-        void set_rows_per_chunk(size_type val) except+
-        void set_line_terminator(string term) except+
-        void set_inter_column_delimiter(char delim) except+
-        void set__true_value(string val) except+
-        void set_false_value(string val) except+
+        void set_metadata(cudf_io_types.table_metadata* val) except +
+        void set_na_rep(string val) except +
+        void enable_include_header(bool val) except +
+        void set_rows_per_chunk(size_type val) except +
+        void set_line_terminator(string term) except +
+        void set_inter_column_delimiter(char delim) except +
+        void set__true_value(string val) except +
+        void set_false_value(string val) except +
 
         @staticmethod
         csv_writer_options_builder builder(
             cudf_io_types.sink_info sink,
             cudf_table_view.table_view table
-        ) except+
+        ) except +
 
     cdef cppclass csv_writer_options_builder:
-        csv_writer_options_builder() except+
+        csv_writer_options_builder() except +
         csv_writer_options_builder(
             cudf_io_types.sink_info sink,
             cudf_table_view.table_view table
-        ) except+
+        ) except +
 
         csv_writer_options_builder& metadata(
             cudf_io_types.table_metadata* val
-        ) except+
-        csv_writer_options_builder& na_rep(string val) except+
-        csv_writer_options_builder& include_header(bool val) except+
-        csv_writer_options_builder& rows_per_chunk(size_type val) except+
-        csv_writer_options_builder& line_terminator(string term) except+
-        csv_writer_options_builder& inter_column_delimiter(char delim) except+
-        csv_writer_options_builder& true_value(string val) except+
-        csv_writer_options_builder& false_value(string val) except+
-
-        csv_writer_options build() except+
+        ) except +
+        csv_writer_options_builder& na_rep(string val) except +
+        csv_writer_options_builder& include_header(bool val) except +
+        csv_writer_options_builder& rows_per_chunk(size_type val) except +
+        csv_writer_options_builder& line_terminator(string term) except +
+        csv_writer_options_builder& inter_column_delimiter(char delim) except +
+        csv_writer_options_builder& true_value(string val) except +
+        csv_writer_options_builder& false_value(string val) except +
+
+        csv_writer_options build() except +
 
     cdef void write_csv(csv_writer_options args) except +
diff --git a/python/cudf/cudf/_lib/cpp/io/json.pxd b/python/cudf/cudf/_lib/cpp/io/json.pxd
index 7333aad7ddf..ab87e2cbb4b 100644
--- a/python/cudf/cudf/_lib/cpp/io/json.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/json.pxd
@@ -20,71 +20,71 @@ cdef extern from "cudf/io/json.hpp" \
         map[string, schema_element] child_types
 
     cdef cppclass json_reader_options:
-        json_reader_options() except+
-        cudf_io_types.source_info get_source() except+
-        vector[string] get_dtypes() except+
+        json_reader_options() except +
+        cudf_io_types.source_info get_source() except +
+        vector[string] get_dtypes() except +
         cudf_io_types.compression_type get_compression() except +
-        size_type get_byte_range_offset() except+
-        size_type get_byte_range_size() except+
-        bool is_enabled_lines() except+
-        bool is_enabled_dayfirst() except+
-        bool is_enabled_experimental() except+
+        size_type get_byte_range_offset() except +
+        size_type get_byte_range_size() except +
+        bool is_enabled_lines() except +
+        bool is_enabled_dayfirst() except +
+        bool is_enabled_experimental() except +
 
         # setter
-        void set_dtypes(vector[data_type] types) except+
-        void set_dtypes(map[string, schema_element] types) except+
+        void set_dtypes(vector[data_type] types) except +
+        void set_dtypes(map[string, schema_element] types) except +
         void set_compression(
             cudf_io_types.compression_type compression
-        ) except+
-        void set_byte_range_offset(size_type offset) except+
-        void set_byte_range_size(size_type size) except+
-        void enable_lines(bool val) except+
-        void enable_dayfirst(bool val) except+
-        void enable_experimental(bool val) except+
-        void enable_keep_quotes(bool val) except+
+        ) except +
+        void set_byte_range_offset(size_type offset) except +
+        void set_byte_range_size(size_type size) except +
+        void enable_lines(bool val) except +
+        void enable_dayfirst(bool val) except +
+        void enable_experimental(bool val) except +
+        void enable_keep_quotes(bool val) except +
 
         @staticmethod
         json_reader_options_builder builder(
             cudf_io_types.source_info src
-        ) except+
+        ) except +
 
     cdef cppclass json_reader_options_builder:
-        json_reader_options_builder() except+
+        json_reader_options_builder() except +
         json_reader_options_builder(
             cudf_io_types.source_info src
-        ) except+
+        ) except +
         json_reader_options_builder& dtypes(
             vector[string] types
-        ) except+
+        ) except +
         json_reader_options_builder& dtypes(
             vector[data_type] types
-        ) except+
+        ) except +
         json_reader_options_builder& dtypes(
             map[string, schema_element] types
-        ) except+
+        ) except +
         json_reader_options_builder& compression(
             cudf_io_types.compression_type compression
-        ) except+
+        ) except +
         json_reader_options_builder& byte_range_offset(
             size_type offset
-        ) except+
+        ) except +
         json_reader_options_builder& byte_range_size(
             size_type size
-        ) except+
+        ) except +
         json_reader_options_builder& lines(
             bool val
-        ) except+
+        ) except +
         json_reader_options_builder& dayfirst(
             bool val
-        ) except+
+        ) except +
         json_reader_options_builder& experimental(
             bool val
-        ) except+
+        ) except +
         json_reader_options_builder& keep_quotes(
             bool val
-        ) except+
+        ) except +
 
-        json_reader_options build() except+
+        json_reader_options build() except +
 
     cdef cudf_io_types.table_with_metadata read_json(
-        json_reader_options &options) except+
+        json_reader_options &options) except +
diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd
index 3e44ef98348..ec26fff3779 100644
--- a/python/cudf/cudf/_lib/cpp/io/orc.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd
@@ -16,45 +16,45 @@ cdef extern from "cudf/io/orc.hpp" \
         namespace "cudf::io" nogil:
 
     cdef cppclass orc_reader_options:
-        orc_reader_options() except+
-
-        cudf_io_types.source_info get_source() except+
-        vector[vector[size_type]] get_stripes() except+
-        size_type get_skip_rows() except+
-        size_type get_num_rows() except+
-        bool is_enabled_use_index() except+
-        bool is_enabled_use_np_dtypes() except+
-        data_type get_timestamp_type() except+
-        bool is_enabled_decimals_as_float64() except+
-        int get_forced_decimals_scale() except+
-
-        void set_columns(vector[string] col_names) except+
-        void set_stripes(vector[vector[size_type]] strps) except+
-        void set_skip_rows(size_type rows) except+
-        void set_num_rows(size_type nrows) except+
-        void enable_use_index(bool val) except+
-        void enable_use_np_dtypes(bool val) except+
-        void set_timestamp_type(data_type type) except+
+        orc_reader_options() except +
+
+        cudf_io_types.source_info get_source() except +
+        vector[vector[size_type]] get_stripes() except +
+        size_type get_skip_rows() except +
+        size_type get_num_rows() except +
+        bool is_enabled_use_index() except +
+        bool is_enabled_use_np_dtypes() except +
+        data_type get_timestamp_type() except +
+        bool is_enabled_decimals_as_float64() except +
+        int get_forced_decimals_scale() except +
+
+        void set_columns(vector[string] col_names) except +
+        void set_stripes(vector[vector[size_type]] strps) except +
+        void set_skip_rows(size_type rows) except +
+        void set_num_rows(size_type nrows) except +
+        void enable_use_index(bool val) except +
+        void enable_use_np_dtypes(bool val) except +
+        void set_timestamp_type(data_type type) except +
 
         @staticmethod
         orc_reader_options_builder builder(
             cudf_io_types.source_info src
-        ) except+
+        ) except +
 
     cdef cppclass orc_reader_options_builder:
-        orc_reader_options_builder() except+
-        orc_reader_options_builder(cudf_io_types.source_info &src) except+
+        orc_reader_options_builder() except +
+        orc_reader_options_builder(cudf_io_types.source_info &src) except +
 
-        orc_reader_options_builder& columns(vector[string] col_names) except+
+        orc_reader_options_builder& columns(vector[string] col_names) except +
         orc_reader_options_builder& \
-            stripes(vector[vector[size_type]] strps) except+
-        orc_reader_options_builder& skip_rows(size_type rows) except+
-        orc_reader_options_builder& num_rows(size_type nrows) except+
-        orc_reader_options_builder& use_index(bool val) except+
-        orc_reader_options_builder& use_np_dtypes(bool val) except+
-        orc_reader_options_builder& timestamp_type(data_type type) except+
+            stripes(vector[vector[size_type]] strps) except +
+        orc_reader_options_builder& skip_rows(size_type rows) except +
+        orc_reader_options_builder& num_rows(size_type nrows) except +
+        orc_reader_options_builder& use_index(bool val) except +
+        orc_reader_options_builder& use_np_dtypes(bool val) except +
+        orc_reader_options_builder& timestamp_type(data_type type) except +
 
-        orc_reader_options build() except+
+        orc_reader_options build() except +
 
     cdef cudf_io_types.table_with_metadata read_orc(
         orc_reader_options opts
@@ -62,108 +62,110 @@ cdef extern from "cudf/io/orc.hpp" \
 
     cdef cppclass orc_writer_options:
         orc_writer_options()
-        cudf_io_types.sink_info get_sink() except+
-        cudf_io_types.compression_type get_compression() except+
-        bool is_enabled_statistics() except+
-        size_t get_stripe_size_bytes() except+
-        size_type get_stripe_size_rows() except+
-        size_type get_row_index_stride() except+
-        cudf_table_view.table_view get_table() except+
-        const cudf_io_types.table_input_metadata *get_metadata() except+
+        cudf_io_types.sink_info get_sink() except +
+        cudf_io_types.compression_type get_compression() except +
+        bool is_enabled_statistics() except +
+        size_t get_stripe_size_bytes() except +
+        size_type get_stripe_size_rows() except +
+        size_type get_row_index_stride() except +
+        cudf_table_view.table_view get_table() except +
+        const cudf_io_types.table_input_metadata *get_metadata() except +
 
         # setter
-        void set_compression(cudf_io_types.compression_type comp) except+
-        void enable_statistics(bool val) except+
-        void set_stripe_size_bytes(size_t val) except+
-        void set_stripe_size_rows(size_type val) except+
-        void set_row_index_stride(size_type val) except+
-        void set_table(cudf_table_view.table_view tbl) except+
-        void set_metadata(cudf_io_types.table_input_metadata* meta) except+
+        void set_compression(cudf_io_types.compression_type comp) except +
+        void enable_statistics(bool val) except +
+        void set_stripe_size_bytes(size_t val) except +
+        void set_stripe_size_rows(size_type val) except +
+        void set_row_index_stride(size_type val) except +
+        void set_table(cudf_table_view.table_view tbl) except +
+        void set_metadata(cudf_io_types.table_input_metadata* meta) except +
         void set_key_value_metadata(map[string, string] kvm) except +
 
         @staticmethod
         orc_writer_options_builder builder(
             cudf_io_types.sink_info &sink,
             cudf_table_view.table_view &tbl
-        ) except+
+        ) except +
 
     cdef cppclass orc_writer_options_builder:
         # setter
         orc_writer_options_builder& compression(
             cudf_io_types.compression_type comp
-        ) except+
-        orc_writer_options_builder& enable_statistics(bool val) except+
-        orc_writer_options_builder& stripe_size_bytes(size_t val) except+
-        orc_writer_options_builder& stripe_size_rows(size_type val) except+
-        orc_writer_options_builder& row_index_stride(size_type val) except+
+        ) except +
+        orc_writer_options_builder& enable_statistics(bool val) except +
+        orc_writer_options_builder& stripe_size_bytes(size_t val) except +
+        orc_writer_options_builder& stripe_size_rows(size_type val) except +
+        orc_writer_options_builder& row_index_stride(size_type val) except +
         orc_writer_options_builder& table(
             cudf_table_view.table_view tbl
-        ) except+
+        ) except +
         orc_writer_options_builder& metadata(
             cudf_io_types.table_input_metadata *meta
-        ) except+
+        ) except +
         orc_writer_options_builder& key_value_metadata(
             map[string, string] kvm
-        ) except+
+        ) except +
 
-        orc_writer_options build() except+
+        orc_writer_options build() except +
 
     cdef void write_orc(orc_writer_options options) except +
 
     cdef cppclass chunked_orc_writer_options:
-        chunked_orc_writer_options() except+
-        cudf_io_types.sink_info get_sink() except+
-        cudf_io_types.compression_type get_compression() except+
-        bool enable_statistics() except+
-        size_t stripe_size_bytes() except+
-        size_type stripe_size_rows() except+
-        size_type row_index_stride() except+
-        cudf_table_view.table_view get_table() except+
+        chunked_orc_writer_options() except +
+        cudf_io_types.sink_info get_sink() except +
+        cudf_io_types.compression_type get_compression() except +
+        bool enable_statistics() except +
+        size_t stripe_size_bytes() except +
+        size_type stripe_size_rows() except +
+        size_type row_index_stride() except +
+        cudf_table_view.table_view get_table() except +
         const cudf_io_types.table_input_metadata *get_metadata(
-        ) except+
+        ) except +
 
         # setter
-        void set_compression(cudf_io_types.compression_type comp) except+
-        void enable_statistics(bool val) except+
-        void set_stripe_size_bytes(size_t val) except+
-        void set_stripe_size_rows(size_type val) except+
-        void set_row_index_stride(size_type val) except+
-        void set_table(cudf_table_view.table_view tbl) except+
+        void set_compression(cudf_io_types.compression_type comp) except +
+        void enable_statistics(bool val) except +
+        void set_stripe_size_bytes(size_t val) except +
+        void set_stripe_size_rows(size_type val) except +
+        void set_row_index_stride(size_type val) except +
+        void set_table(cudf_table_view.table_view tbl) except +
         void set_metadata(
             cudf_io_types.table_input_metadata* meta
-        ) except+
+        ) except +
         void set_key_value_metadata(map[string, string] kvm) except +
 
         @staticmethod
         chunked_orc_writer_options_builder builder(
             cudf_io_types.sink_info &sink
-        ) except+
+        ) except +
 
     cdef cppclass chunked_orc_writer_options_builder:
         # setter
         chunked_orc_writer_options_builder& compression(
             cudf_io_types.compression_type comp
-        ) except+
-        chunked_orc_writer_options_builder& enable_statistics(bool val) except+
-        orc_writer_options_builder& stripe_size_bytes(size_t val) except+
-        orc_writer_options_builder& stripe_size_rows(size_type val) except+
-        orc_writer_options_builder& row_index_stride(size_type val) except+
+        ) except +
+        chunked_orc_writer_options_builder& enable_statistics(
+            bool val
+        ) except +
+        orc_writer_options_builder& stripe_size_bytes(size_t val) except +
+        orc_writer_options_builder& stripe_size_rows(size_type val) except +
+        orc_writer_options_builder& row_index_stride(size_type val) except +
         chunked_orc_writer_options_builder& table(
             cudf_table_view.table_view tbl
-        ) except+
+        ) except +
         chunked_orc_writer_options_builder& metadata(
             cudf_io_types.table_input_metadata *meta
-        ) except+
+        ) except +
         chunked_orc_writer_options_builder& key_value_metadata(
             map[string, string] kvm
-        ) except+
+        ) except +
 
-        chunked_orc_writer_options build() except+
+        chunked_orc_writer_options build() except +
 
     cdef cppclass orc_chunked_writer:
-        orc_chunked_writer() except+
-        orc_chunked_writer(chunked_orc_writer_options args) except+
+        orc_chunked_writer() except +
+        orc_chunked_writer(chunked_orc_writer_options args) except +
         orc_chunked_writer& write(
             cudf_table_view.table_view table_,
-        ) except+
-        void close() except+
+        ) except +
+        void close() except +
diff --git a/python/cudf/cudf/_lib/cpp/io/parquet.pxd b/python/cudf/cudf/_lib/cpp/io/parquet.pxd
index f388fff3beb..98b839ba9b8 100644
--- a/python/cudf/cudf/_lib/cpp/io/parquet.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/parquet.pxd
@@ -66,11 +66,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         cudf_io_types.statistics_freq get_stats_level() except +
         cudf_table_view.table_view get_table() except +
         const cudf_io_types.table_input_metadata get_metadata() except +
-        string get_column_chunks_file_paths() except+
-        size_t get_row_group_size_bytes() except+
-        size_type get_row_group_size_rows() except+
-        size_t get_max_page_size_bytes() except+
-        size_type get_max_page_size_rows() except+
+        string get_column_chunks_file_paths() except +
+        size_t get_row_group_size_bytes() except +
+        size_type get_row_group_size_rows() except +
+        size_t get_max_page_size_bytes() except +
+        size_type get_max_page_size_rows() except +
 
         void set_partitions(
             vector[cudf_io_types.partition_info] partitions
@@ -90,10 +90,10 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         void set_column_chunks_file_paths(
             vector[string] column_chunks_file_paths
         ) except +
-        void set_row_group_size_bytes(size_t val) except+
-        void set_row_group_size_rows(size_type val) except+
-        void set_max_page_size_bytes(size_t val) except+
-        void set_max_page_size_rows(size_type val) except+
+        void set_row_group_size_bytes(size_t val) except +
+        void set_row_group_size_rows(size_type val) except +
+        void set_max_page_size_bytes(size_t val) except +
+        void set_max_page_size_rows(size_type val) except +
 
         @staticmethod
         parquet_writer_options_builder builder(
@@ -131,16 +131,16 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         ) except +
         parquet_writer_options_builder& row_group_size_bytes(
             size_t val
-        ) except+
+        ) except +
         parquet_writer_options_builder& row_group_size_rows(
             size_type val
-        ) except+
+        ) except +
         parquet_writer_options_builder& max_page_size_bytes(
             size_t val
-        ) except+
+        ) except +
         parquet_writer_options_builder& max_page_size_rows(
             size_type val
-        ) except+
+        ) except +
 
         parquet_writer_options build() except +
 
@@ -154,11 +154,11 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         cudf_io_types.compression_type get_compression() except +
         cudf_io_types.statistics_freq get_stats_level() except +
         cudf_io_types.table_input_metadata* get_metadata(
-        ) except+
-        size_t get_row_group_size_bytes() except+
-        size_type get_row_group_size_rows() except+
-        size_t get_max_page_size_bytes() except+
-        size_type get_max_page_size_rows() except+
+        ) except +
+        size_t get_row_group_size_bytes() except +
+        size_type get_row_group_size_rows() except +
+        size_t get_max_page_size_bytes() except +
+        size_type get_max_page_size_rows() except +
 
         void set_metadata(
             cudf_io_types.table_input_metadata *m
@@ -172,10 +172,10 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         void set_compression(
             cudf_io_types.compression_type compression
         ) except +
-        void set_row_group_size_bytes(size_t val) except+
-        void set_row_group_size_rows(size_type val) except+
-        void set_max_page_size_bytes(size_t val) except+
-        void set_max_page_size_rows(size_type val) except+
+        void set_row_group_size_bytes(size_t val) except +
+        void set_row_group_size_rows(size_type val) except +
+        void set_max_page_size_bytes(size_t val) except +
+        void set_max_page_size_rows(size_type val) except +
 
         @staticmethod
         chunked_parquet_writer_options_builder builder(
@@ -201,32 +201,32 @@ cdef extern from "cudf/io/parquet.hpp" namespace "cudf::io" nogil:
         ) except +
         chunked_parquet_writer_options_builder& row_group_size_bytes(
             size_t val
-        ) except+
+        ) except +
         chunked_parquet_writer_options_builder& row_group_size_rows(
             size_type val
-        ) except+
+        ) except +
         chunked_parquet_writer_options_builder& max_page_size_bytes(
             size_t val
-        ) except+
+        ) except +
         chunked_parquet_writer_options_builder& max_page_size_rows(
             size_type val
-        ) except+
+        ) except +
 
         chunked_parquet_writer_options build() except +
 
     cdef cppclass parquet_chunked_writer:
-        parquet_chunked_writer() except+
-        parquet_chunked_writer(chunked_parquet_writer_options args) except+
+        parquet_chunked_writer() except +
+        parquet_chunked_writer(chunked_parquet_writer_options args) except +
         parquet_chunked_writer& write(
             cudf_table_view.table_view table_,
-        ) except+
+        ) except +
         parquet_chunked_writer& write(
             const cudf_table_view.table_view& table_,
             const vector[cudf_io_types.partition_info]& partitions,
-        ) except+
+        ) except +
         unique_ptr[vector[uint8_t]] close(
             vector[string] column_chunks_file_paths,
-        ) except+
+        ) except +
 
     cdef unique_ptr[vector[uint8_t]] merge_row_group_metadata(
         const vector[unique_ptr[vector[uint8_t]]]& metadata_list
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f00c7d1f2b5..126da0f883a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3333,7 +3333,11 @@ def agg(self, aggs, axis=None):
 
     @_cudf_nvtx_annotate
     def nlargest(self, n, columns, keep="first"):
-        """Get the rows of the DataFrame sorted by the n largest value of *columns*
+        """Return the first *n* rows ordered by *columns* in descending order.
+
+        Return the first *n* rows with the largest values in *columns*, in
+        descending order. The columns that are not specified are returned as
+        well, but not used for ordering.
 
         Parameters
         ----------
@@ -3396,7 +3400,11 @@ def nlargest(self, n, columns, keep="first"):
         return self._n_largest_or_smallest(True, n, columns, keep)
 
     def nsmallest(self, n, columns, keep="first"):
-        """Get the rows of the DataFrame sorted by the n smallest value of *columns*
+        """Return the first *n* rows ordered by *columns* in ascending order.
+
+        Return the first *n* rows with the smallest values in *columns*, in
+        ascending order. The columns that are not specified are returned as
+        well, but not used for ordering.
 
         Parameters
         ----------
@@ -5879,7 +5887,7 @@ def _columns_view(self, columns):
 
     @_cudf_nvtx_annotate
     def select_dtypes(self, include=None, exclude=None):
-        """Return a subset of the DataFrame’s columns based on the column dtypes.
+        """Return a subset of the DataFrame's columns based on the column dtypes.
 
         Parameters
         ----------
@@ -5938,7 +5946,7 @@ def select_dtypes(self, include=None, exclude=None):
         3  False  2.0
         4   True  1.0
         5  False  2.0
-        """
+        """  # noqa: E501
 
         # code modified from:
         # https://github.com/pandas-dev/pandas/blob/master/pandas/core/frame.py#L3196
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 0acacc798a1..bbb1c95bef6 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -4719,10 +4719,12 @@ def _drop_rows_by_labels(
     level: Union[int, str],
     errors: str,
 ) -> DataFrameOrSeries:
-    """Remove rows specified by `labels`. If `errors="raise"`, an error is raised
-    if some items in `labels` do not exist in `obj._index`.
+    """Remove rows specified by `labels`.
 
-    Will raise if level(int) is greater or equal to index nlevels
+    If `errors="raise"`, an error is raised if some items in `labels` do not
+    exist in `obj._index`.
+
+    Will raise if level(int) is greater or equal to index nlevels.
     """
     if isinstance(level, int) and level >= obj.index.nlevels:
         raise ValueError("Param level out of bounds.")
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 7493202a3d1..07e1782d788 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -889,7 +889,7 @@ def reindex(self, *args, **kwargs):
             DataFrame, followed by the original Series values. When `drop` is
             True, a `Series` is returned. In either case, if ``inplace=True``,
             no value is returned.
-""",
+""",  # noqa: E501
             example="""
         >>> series = cudf.Series(['a', 'b', 'c', 'd'], index=[10, 11, 12, 13])
         >>> series
@@ -2998,7 +2998,7 @@ def describe(
 
     @_cudf_nvtx_annotate
     def digitize(self, bins, right=False):
-        """Return the indices of the bins to which each value in series belongs.
+        """Return the indices of the bins to which each value belongs.
 
         Notes
         -----
diff --git a/python/cudf/cudf/utils/hash_vocab_utils.py b/python/cudf/cudf/utils/hash_vocab_utils.py
index cecf0c36bc2..a0915951240 100644
--- a/python/cudf/cudf/utils/hash_vocab_utils.py
+++ b/python/cudf/cudf/utils/hash_vocab_utils.py
@@ -253,9 +253,10 @@ def hash_vocab(
 
     hashed_vocab = {_sdbm_hash(key): value for key, value in vocab.items()}
 
-    error_message = """Collision occurred and only sdbm token hash current supported :(
-      Can be extended to use random hashes if needed"""
-
+    error_message = (
+        "A collision occurred and only sdbm token hash is currently "
+        "supported. This can be extended to use random hashes if needed."
+    )
     assert len(hashed_vocab) == len(vocab), error_message
 
     (
diff --git a/setup.cfg b/setup.cfg
index d196e8605b2..d810178c44b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,8 +1,9 @@
 # Copyright (c) 2017-2022, NVIDIA CORPORATION.
 
 [flake8]
-filename = *.py, *.pyx, *.pxd
+filename = *.py, *.pyx, *.pxd, *.pxi
 exclude = __init__.py, *.egg, build, docs, .git
+force-check = True
 ignore =
     # line break before binary operator
     W503,
@@ -14,11 +15,13 @@ per-file-ignores =
     # E225: Missing whitespace around operators (breaks cython casting syntax like <int>)
     # E226: Missing whitespace around arithmetic operators (breaks cython pointer syntax like int*)
     # E227: Missing whitespace around bitwise or shift operator (Can also break casting syntax)
+    # E275: Missing whitespace after keyword (Doesn't work with Cython except?)
     # E402: invalid syntax (works for Python, not Cython)
     # E999: invalid syntax (works for Python, not Cython)
     # W504: line break after binary operator (breaks lines that end with a pointer)
-    *.pyx: E211, E225, E226, E227, E402, E999, W504
-    *.pxd: E211, E225, E226, E227, E402, E999, W504
+    *.pyx: E211, E225, E226, E227, E275, E402, E999, W504
+    *.pxd: E211, E225, E226, E227, E275, E402, E999, W504
+    *.pxi: E211, E225, E226, E227, E275, E402, E999, W504
 
 [pydocstyle]
 # Due to https://github.com/PyCQA/pydocstyle/issues/363, we must exclude rather

From 6ca2ceb8e200d55f1f681a4ca086614a28d67ad1 Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@gmail.com>
Date: Tue, 18 Oct 2022 17:23:42 -0500
Subject: [PATCH 047/202] Adds retryCount to RmmEventHandler.onAllocFailure
 (#11940)

This adds the method `boolean onAllocFailure(long sizeRequested, int retryCount)` to `RmmEventHandler`, to help handling code keep track of the number of times an allocation failure has been retried.

With this code callers can perform extra logic that depends on whether the callback was due to a brand new allocation failure, or one that has failed in the past and is being retried.

This will be used here: https://github.com/NVIDIA/spark-rapids/issues/6768

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11940
---
 .../java/ai/rapids/cudf/RmmEventHandler.java  | 23 ++++++++++++--
 java/src/main/native/src/RmmJni.cpp           | 31 +++++++++++++++----
 .../src/test/java/ai/rapids/cudf/RmmTest.java | 17 +++++-----
 3 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java b/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java
index 85442402403..19707b85bcb 100644
--- a/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java
+++ b/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2020, NVIDIA CORPORATION.
+ *  Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -22,9 +22,28 @@ public interface RmmEventHandler {
   /**
    * Invoked on a memory allocation failure.
    * @param sizeRequested number of bytes that failed to allocate
+   * @deprecated deprecated in favor of onAllocFailure(long, boolean)
    * @return true if the memory allocation should be retried or false if it should fail
    */
-  boolean onAllocFailure(long sizeRequested);
+  default boolean onAllocFailure(long sizeRequested) {
+    // this should not be called since it was the previous interface,
+    // and it was abstract before, throwing by default for good measure.
+    throw new UnsupportedOperationException(
+        "Unexpected invocation of deprecated onAllocFailure without retry count.");
+  }
+
+  /**
+   * Invoked on a memory allocation failure.
+   * @param sizeRequested number of bytes that failed to allocate
+   * @param retryCount number of times this allocation has been retried after failure
+   * @return true if the memory allocation should be retried or false if it should fail
+   */
+  default boolean onAllocFailure(long sizeRequested, int retryCount) {
+    // newer code should override this implementation of `onAllocFailure` to handle
+    // `retryCount`. Otherwise, we call the prior implementation to not
+    // break existing code.
+    return onAllocFailure(sizeRequested);
+  }
 
   /**
    * Get the memory thresholds that will trigger {@link #onAllocThreshold(long)}
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index ce3e6ffb285..2b4c5ae59f5 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -150,9 +150,15 @@ class java_event_handler_memory_resource final : public device_memory_resource {
     if (cls == nullptr) {
       throw cudf::jni::jni_exception("class not found");
     }
-    on_alloc_fail_method = env->GetMethodID(cls, "onAllocFailure", "(J)Z");
+    on_alloc_fail_method = env->GetMethodID(cls, "onAllocFailure", "(JI)Z");
     if (on_alloc_fail_method == nullptr) {
-      throw cudf::jni::jni_exception("onAllocFailure method");
+      use_old_alloc_fail_interface = true;
+      on_alloc_fail_method = env->GetMethodID(cls, "onAllocFailure", "(J)Z");
+      if (on_alloc_fail_method == nullptr) {
+        throw cudf::jni::jni_exception("onAllocFailure method");
+      }
+    } else {
+      use_old_alloc_fail_interface = false;
     }
     on_alloc_threshold_method = env->GetMethodID(cls, "onAllocThreshold", "(J)V");
     if (on_alloc_threshold_method == nullptr) {
@@ -190,6 +196,7 @@ class java_event_handler_memory_resource final : public device_memory_resource {
   JavaVM *jvm;
   jobject handler_obj;
   jmethodID on_alloc_fail_method;
+  bool use_old_alloc_fail_interface;
   jmethodID on_alloc_threshold_method;
   jmethodID on_dealloc_threshold_method;
 
@@ -209,10 +216,18 @@ class java_event_handler_memory_resource final : public device_memory_resource {
     }
   }
 
-  bool on_alloc_fail(std::size_t num_bytes) {
+  bool on_alloc_fail(std::size_t num_bytes, int retry_count) {
     JNIEnv *env = cudf::jni::get_jni_env(jvm);
-    jboolean result =
-        env->CallBooleanMethod(handler_obj, on_alloc_fail_method, static_cast<jlong>(num_bytes));
+    jboolean result = false;
+    if (!use_old_alloc_fail_interface) {
+      result =
+          env->CallBooleanMethod(handler_obj, on_alloc_fail_method, static_cast<jlong>(num_bytes),
+                                 static_cast<jint>(retry_count));
+
+    } else {
+      result =
+          env->CallBooleanMethod(handler_obj, on_alloc_fail_method, static_cast<jlong>(num_bytes));
+    }
     if (env->ExceptionCheck()) {
       throw std::runtime_error("onAllocFailure handler threw an exception");
     }
@@ -240,13 +255,17 @@ class java_event_handler_memory_resource final : public device_memory_resource {
   void *do_allocate(std::size_t num_bytes, rmm::cuda_stream_view stream) override {
     std::size_t total_before;
     void *result;
+    // a non-zero retry_count signifies that the `on_alloc_fail`
+    // callback is being invoked while re-attempting an allocation
+    // that had previously failed.
+    int retry_count = 0;
     while (true) {
       try {
         total_before = get_total_bytes_allocated();
         result = resource->allocate(num_bytes, stream);
         break;
       } catch (rmm::out_of_memory const &e) {
-        if (!on_alloc_fail(num_bytes)) {
+        if (!on_alloc_fail(num_bytes, retry_count++)) {
           throw;
         }
       }
diff --git a/java/src/test/java/ai/rapids/cudf/RmmTest.java b/java/src/test/java/ai/rapids/cudf/RmmTest.java
index c56b131de86..09fbedd8a1c 100644
--- a/java/src/test/java/ai/rapids/cudf/RmmTest.java
+++ b/java/src/test/java/ai/rapids/cudf/RmmTest.java
@@ -73,11 +73,13 @@ public void testTotalAllocated(int rmmAllocMode) {
   public void testEventHandler(int rmmAllocMode) {
     AtomicInteger invokedCount = new AtomicInteger();
     AtomicLong amountRequested = new AtomicLong();
+    AtomicInteger timesRetried = new AtomicInteger();
 
     RmmEventHandler handler = new BaseRmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         int count = invokedCount.incrementAndGet();
+        timesRetried.set(retryCount);
         amountRequested.set(sizeRequested);
         return count != 3;
       }
@@ -100,6 +102,7 @@ public boolean onAllocFailure(long sizeRequested) {
     }
 
     assertEquals(3, invokedCount.get());
+    assertEquals(2, timesRetried.get());
     assertEquals(requested, amountRequested.get());
 
     // verify after a failure we can still allocate something more reasonable
@@ -114,7 +117,7 @@ public void testSetEventHandlerTwice() {
     // installing an event handler the first time should not be an error
     Rmm.setEventHandler(new BaseRmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         return false;
       }
     });
@@ -122,7 +125,7 @@ public boolean onAllocFailure(long sizeRequested) {
     // installing a second event handler is an error
     RmmEventHandler otherHandler = new BaseRmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         return true;
       }
     };
@@ -138,7 +141,7 @@ public void testClearEventHandler() {
     // create an event handler that will always retry
     RmmEventHandler retryHandler = new BaseRmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         return true;
       }
     };
@@ -165,7 +168,7 @@ public void testAllocOnlyThresholds() {
 
     RmmEventHandler handler = new RmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         return false;
       }
 
@@ -228,7 +231,7 @@ public void testThresholds() {
 
     RmmEventHandler handler = new RmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         return false;
       }
 
@@ -308,7 +311,7 @@ public void testExceptionHandling() {
 
     RmmEventHandler handler = new RmmEventHandler() {
       @Override
-      public boolean onAllocFailure(long sizeRequested) {
+      public boolean onAllocFailure(long sizeRequested, int retryCount) {
         throw new AllocFailException();
       }
 

From 08e4ec2a64050c8e70b052c4ccf5f59073c77c8c Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 19 Oct 2022 14:55:54 -0400
Subject: [PATCH 048/202] Refactor pad/zfill functions for reuse with strings
 udf (#11914)

Refactors the main device code used for `cudf::strings::pad` and `cudf::strings::zfill` for reuse in strings UDF pad and zfill functions. No new functions or features have been added, updated, or removed. The detail functions have been mainly just be moved to new file `cpp/include/cudf/strings/detail/pad_impl.cuh`

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Tobias Ribizel (https://github.com/upsj)

URL: https://github.com/rapidsai/cudf/pull/11914
---
 cpp/include/cudf/strings/detail/pad_impl.cuh | 126 +++++++++++++++++++
 cpp/src/strings/padding.cu                   |  63 +++-------
 2 files changed, 144 insertions(+), 45 deletions(-)
 create mode 100644 cpp/include/cudf/strings/detail/pad_impl.cuh

diff --git a/cpp/include/cudf/strings/detail/pad_impl.cuh b/cpp/include/cudf/strings/detail/pad_impl.cuh
new file mode 100644
index 00000000000..648c240bfbc
--- /dev/null
+++ b/cpp/include/cudf/strings/detail/pad_impl.cuh
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/strings/detail/utilities.cuh>
+#include <cudf/strings/side_type.hpp>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace detail {
+
+/**
+ * @brief Return the size in bytes of padding d_str to width characters using a fill character
+ * with byte length of fill_char_size
+ *
+ * Pad does not perform truncation. That is, If `d_str.length() > width` then `d_str.size_bytes()`
+ * is returned.
+ *
+ * @param d_str String to pad
+ * @param width Number of characters for the padded string result
+ * @param fill_char_size Size of the fill character in bytes
+ * @return The number of bytes required for the pad
+ */
+__device__ size_type compute_padded_size(string_view d_str,
+                                         size_type width,
+                                         size_type fill_char_size)
+{
+  auto const length = d_str.length();
+  auto bytes        = d_str.size_bytes();
+  if (width > length)                            // no truncating;
+    bytes += fill_char_size * (width - length);  // add padding
+  return bytes;
+}
+
+/**
+ * @brief Pad d_str with fill_char into output up to width characters
+ *
+ * Pad does not perform truncation. That is, If `d_str.length() > width` then
+ * then d_str is copied into output.
+ *
+ * @tparam side Specifies where fill_char is added to d_str
+ * @param d_str String to pad
+ * @param width Number of characters for the padded string result
+ * @param fill_char Size of the fill character in bytes
+ * @param output Device memory to copy the padded string into
+ */
+template <side_type side = side_type::RIGHT>
+__device__ void pad_impl(cudf::string_view d_str,
+                         cudf::size_type width,
+                         cudf::char_utf8 fill_char,
+                         char* output)
+{
+  auto length = d_str.length();
+  if constexpr (side == side_type::LEFT) {
+    while (length++ < width) {
+      output += from_char_utf8(fill_char, output);
+    }
+    copy_string(output, d_str);
+  }
+  if constexpr (side == side_type::RIGHT) {
+    output = copy_string(output, d_str);
+    while (length++ < width) {
+      output += from_char_utf8(fill_char, output);
+    }
+  }
+  if constexpr (side == side_type::BOTH) {
+    auto const pad_size = width - length;
+    // an odd width will right-justify
+    auto right_pad = (width % 2) ? pad_size / 2 : (pad_size - pad_size / 2);
+    auto left_pad  = pad_size - right_pad;  // e.g. width=7: "++foxx+"; width=6: "+fox++"
+    while (left_pad-- > 0) {
+      output += from_char_utf8(fill_char, output);
+    }
+    output = copy_string(output, d_str);
+    while (right_pad-- > 0) {
+      output += from_char_utf8(fill_char, output);
+    }
+  }
+}
+
+/**
+ * @brief Prepend d_str with '0' into output up to width characters
+ *
+ * Pad does not perform truncation. That is, If `d_str.length() > width` then
+ * then d_str is copied into output.
+ *
+ * If d_str starts with a sign character ('-' or '+') then '0' padding
+ * starts after the sign.
+ *
+ * @param d_str String to pad
+ * @param width Number of characters for the padded string result
+ * @param output Device memory to copy the padded string into
+ */
+__device__ void zfill_impl(cudf::string_view d_str, cudf::size_type width, char* output)
+{
+  auto length = d_str.length();
+  auto in_ptr = d_str.data();
+  // if the string starts with a sign, output the sign first
+  if (!d_str.empty() && (*in_ptr == '-' || *in_ptr == '+')) {
+    *output++ = *in_ptr++;
+    d_str     = cudf::string_view{in_ptr, d_str.size_bytes() - 1};
+  }
+  while (length++ < width)
+    *output++ = '0';  // prepend zero char
+  copy_string(output, d_str);
+}
+
+}  // namespace detail
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu
index e601eeb6b6e..e4002525af9 100644
--- a/cpp/src/strings/padding.cu
+++ b/cpp/src/strings/padding.cu
@@ -20,8 +20,7 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/strings/detail/utilities.cuh>
-#include <cudf/strings/detail/utilities.hpp>
+#include <cudf/strings/detail/pad_impl.cuh>
 #include <cudf/strings/padding.hpp>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
@@ -38,6 +37,7 @@ namespace cudf {
 namespace strings {
 namespace detail {
 namespace {
+
 struct compute_pad_output_length_fn {
   column_device_view d_strings;
   size_type width;
@@ -47,11 +47,7 @@ struct compute_pad_output_length_fn {
   {
     if (d_strings.is_null(idx)) return 0;
     string_view d_str = d_strings.element<string_view>(idx);
-    size_type bytes   = d_str.size_bytes();
-    size_type length  = d_str.length();
-    if (width > length)                            // no truncating
-      bytes += fill_char_size * (width - length);  // add padding
-    return bytes;
+    return compute_padded_size(d_str, width, fill_char_size);
   }
 };
 
@@ -96,13 +92,10 @@ std::unique_ptr<column> pad(
       thrust::make_counting_iterator<cudf::size_type>(0),
       strings_count,
       [d_strings, width, d_fill_char, d_offsets, d_chars] __device__(size_type idx) {
-        if (d_strings.is_null(idx)) return;
-        string_view d_str = d_strings.element<string_view>(idx);
-        auto length       = d_str.length();
-        char* ptr         = d_chars + d_offsets[idx];
-        while (length++ < width)
-          ptr += from_char_utf8(d_fill_char, ptr);
-        copy_string(ptr, d_str);
+        if (d_strings.is_valid(idx)) {
+          pad_impl<side_type::LEFT>(
+            d_strings.element<string_view>(idx), width, d_fill_char, d_chars + d_offsets[idx]);
+        }
       });
   } else if (side == side_type::RIGHT) {
     thrust::for_each_n(
@@ -110,13 +103,10 @@ std::unique_ptr<column> pad(
       thrust::make_counting_iterator<cudf::size_type>(0),
       strings_count,
       [d_strings, width, d_fill_char, d_offsets, d_chars] __device__(size_type idx) {
-        if (d_strings.is_null(idx)) return;
-        string_view d_str = d_strings.element<string_view>(idx);
-        auto length       = d_str.length();
-        char* ptr         = d_chars + d_offsets[idx];
-        ptr               = copy_string(ptr, d_str);
-        while (length++ < width)
-          ptr += from_char_utf8(d_fill_char, ptr);
+        if (d_strings.is_valid(idx)) {
+          pad_impl<side_type::RIGHT>(
+            d_strings.element<string_view>(idx), width, d_fill_char, d_chars + d_offsets[idx]);
+        }
       });
   } else if (side == side_type::BOTH) {
     thrust::for_each_n(
@@ -124,18 +114,10 @@ std::unique_ptr<column> pad(
       thrust::make_counting_iterator<cudf::size_type>(0),
       strings_count,
       [d_strings, width, d_fill_char, d_offsets, d_chars] __device__(size_type idx) {
-        if (d_strings.is_null(idx)) return;
-        string_view d_str = d_strings.element<string_view>(idx);
-        char* ptr         = d_chars + d_offsets[idx];
-        auto pad          = static_cast<int32_t>(width - d_str.length());
-        auto right_pad    = (width & 1) ? pad / 2 : (pad - pad / 2);  // odd width = right-justify
-        auto left_pad =
-          pad - right_pad;  // e.g. width=7 gives "++foxx+" while width=6 gives "+fox++"
-        while (left_pad-- > 0)
-          ptr += from_char_utf8(d_fill_char, ptr);
-        ptr = copy_string(ptr, d_str);
-        while (right_pad-- > 0)
-          ptr += from_char_utf8(d_fill_char, ptr);
+        if (d_strings.is_valid(idx)) {
+          pad_impl<side_type::BOTH>(
+            d_strings.element<string_view>(idx), width, d_fill_char, d_chars + d_offsets[idx]);
+        }
       });
   }
 
@@ -174,19 +156,10 @@ std::unique_ptr<column> zfill(
                      thrust::make_counting_iterator<cudf::size_type>(0),
                      input.size(),
                      [d_strings, width, d_offsets, d_chars] __device__(size_type idx) {
-                       if (d_strings.is_null(idx)) return;
-                       auto d_str   = d_strings.element<string_view>(idx);
-                       auto length  = d_str.length();
-                       auto in_ptr  = d_str.data();
-                       auto out_ptr = d_chars + d_offsets[idx];
-                       // if the string starts with a sign, output the sign first
-                       if (!d_str.empty() && (*in_ptr == '-' || *in_ptr == '+')) {
-                         *out_ptr++ = *in_ptr++;
-                         d_str      = string_view{in_ptr, d_str.size_bytes() - 1};
+                       if (d_strings.is_valid(idx)) {
+                         zfill_impl(
+                           d_strings.element<string_view>(idx), width, d_chars + d_offsets[idx]);
                        }
-                       while (length++ < width)
-                         *out_ptr++ = '0';  // prepend zero char
-                       copy_string(out_ptr, d_str);
                      });
 
   return make_strings_column(input.size(),

From 08ffeccca565aff25f7ca0e718bde8de99dffd35 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 19 Oct 2022 16:12:10 -0400
Subject: [PATCH 049/202] Fix some gtests incorrectly coded in namespace
 cudf::test (part I) (#11917)

Fixes a few simple gtests that may not get touched in the course of other PRs.
This removes the `using namespace cudf::test` or similar declaration from gtests where it is improperly used.
No code logic has changed just variable declarations and function calls.

Reference #11734

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Tobias Ribizel (https://github.com/upsj)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11917
---
 cpp/tests/bitmask/is_element_valid_tests.cpp |  43 +--
 cpp/tests/hashing/hash_test.cpp              | 377 ++++++++++---------
 cpp/tests/interop/dlpack_test.cpp            |  68 ++--
 cpp/tests/lists/explode_tests.cpp            |  87 +++--
 cpp/tests/reshape/byte_cast_tests.cpp        | 181 ++++-----
 cpp/tests/reshape/tile_tests.cpp             |  23 +-
 6 files changed, 401 insertions(+), 378 deletions(-)

diff --git a/cpp/tests/bitmask/is_element_valid_tests.cpp b/cpp/tests/bitmask/is_element_valid_tests.cpp
index 383448c0dd8..888d0103f03 100644
--- a/cpp/tests/bitmask/is_element_valid_tests.cpp
+++ b/cpp/tests/bitmask/is_element_valid_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -23,15 +23,12 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
-namespace cudf {
-namespace test {
-
-struct IsElementValidTest : public BaseFixture {
+struct IsElementValidTest : public cudf::test::BaseFixture {
 };
 
 TEST_F(IsElementValidTest, IsElementValidBasic)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1});
   EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 0));
   EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 1));
   EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 2));
@@ -41,12 +38,12 @@ TEST_F(IsElementValidTest, IsElementValidBasic)
 
 TEST_F(IsElementValidTest, IsElementValidLarge)
 {
-  auto filter        = [](auto i) { return static_cast<bool>(i % 3); };
-  auto val           = thrust::make_counting_iterator(0);
-  auto valid         = cudf::detail::make_counting_transform_iterator(0, filter);
-  size_type num_rows = 1000;
+  auto filter              = [](auto i) { return static_cast<bool>(i % 3); };
+  auto val                 = thrust::make_counting_iterator(0);
+  auto valid               = cudf::detail::make_counting_transform_iterator(0, filter);
+  cudf::size_type num_rows = 1000;
 
-  fixed_width_column_wrapper<int32_t> col(val, val + num_rows, valid);
+  cudf::test::fixed_width_column_wrapper<int32_t> col(val, val + num_rows, valid);
 
   for (int i = 0; i < num_rows; i++) {
     EXPECT_EQ(cudf::detail::is_element_valid_sync(col, i), filter(i));
@@ -55,16 +52,16 @@ TEST_F(IsElementValidTest, IsElementValidLarge)
 
 TEST_F(IsElementValidTest, IsElementValidOffset)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1});
   {
-    auto offset_col = slice(col, {1, 5}).front();
+    auto offset_col = cudf::slice(col, {1, 5}).front();
     EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0));
     EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1));
     EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 2));
     EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 3));
   }
   {
-    auto offset_col = slice(col, {2, 5}).front();
+    auto offset_col = cudf::slice(col, {2, 5}).front();
     EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0));
     EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1));
     EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 2));
@@ -73,20 +70,16 @@ TEST_F(IsElementValidTest, IsElementValidOffset)
 
 TEST_F(IsElementValidTest, IsElementValidOffsetLarge)
 {
-  auto filter        = [](auto i) { return static_cast<bool>(i % 3); };
-  size_type offset   = 37;
-  auto val           = thrust::make_counting_iterator(0);
-  auto valid         = cudf::detail::make_counting_transform_iterator(0, filter);
-  size_type num_rows = 1000;
+  auto filter              = [](auto i) { return static_cast<bool>(i % 3); };
+  cudf::size_type offset   = 37;
+  auto val                 = thrust::make_counting_iterator(0);
+  auto valid               = cudf::detail::make_counting_transform_iterator(0, filter);
+  cudf::size_type num_rows = 1000;
 
-  fixed_width_column_wrapper<int32_t> col(val, val + num_rows, valid);
-  auto offset_col = slice(col, {offset, num_rows}).front();
+  cudf::test::fixed_width_column_wrapper<int32_t> col(val, val + num_rows, valid);
+  auto offset_col = cudf::slice(col, {offset, num_rows}).front();
 
   for (int i = 0; i < offset_col.size(); i++) {
     EXPECT_EQ(cudf::detail::is_element_valid_sync(offset_col, i), filter(i + offset));
   }
 }
-
-}  // namespace test
-
-}  // namespace cudf
diff --git a/cpp/tests/hashing/hash_test.cpp b/cpp/tests/hashing/hash_test.cpp
index baa7ba07ee4..c1a73761e8d 100644
--- a/cpp/tests/hashing/hash_test.cpp
+++ b/cpp/tests/hashing/hash_test.cpp
@@ -24,37 +24,35 @@
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-using cudf::test::fixed_width_column_wrapper;
-using cudf::test::strings_column_wrapper;
-using namespace cudf::test;
-using namespace cudf::test::iterators;
-
-constexpr debug_output_level verbosity{debug_output_level::ALL_ERRORS};
+constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
 
 class HashTest : public cudf::test::BaseFixture {
 };
 
 TEST_F(HashTest, MultiValue)
 {
-  strings_column_wrapper const strings_col({"",
-                                            "The quick brown fox",
-                                            "jumps over the lazy dog.",
-                                            "All work and no play makes Jack a dull boy",
-                                            R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
+  cudf::test::strings_column_wrapper const strings_col(
+    {"",
+     "The quick brown fox",
+     "jumps over the lazy dog.",
+     "All work and no play makes Jack a dull boy",
+     R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
 
   using limits = std::numeric_limits<int32_t>;
-  fixed_width_column_wrapper<int32_t> const ints_col({0, 100, -100, limits::min(), limits::max()});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
+    {0, 100, -100, limits::min(), limits::max()});
 
   // Different truth values should be equal
-  fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
-  fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
 
   using ts = cudf::timestamp_s;
-  fixed_width_column_wrapper<ts, ts::duration> const secs_col({ts::duration::zero(),
-                                                               static_cast<ts::duration>(100),
-                                                               static_cast<ts::duration>(-100),
-                                                               ts::duration::min(),
-                                                               ts::duration::max()});
+  cudf::test::fixed_width_column_wrapper<ts, ts::duration> const secs_col(
+    {ts::duration::zero(),
+     static_cast<ts::duration>(100),
+     static_cast<ts::duration>(-100),
+     ts::duration::min(),
+     ts::duration::max()});
 
   auto const input1 = cudf::table_view({strings_col, ints_col, bools_col1, secs_col});
   auto const input2 = cudf::table_view({strings_col, ints_col, bools_col2, secs_col});
@@ -69,45 +67,49 @@ TEST_F(HashTest, MultiValue)
 TEST_F(HashTest, MultiValueNulls)
 {
   // Nulls with different values should be equal
-  strings_column_wrapper const strings_col1({"",
-                                             "The quick brown fox",
-                                             "jumps over the lazy dog.",
-                                             "All work and no play makes Jack a dull boy",
-                                             R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
-                                            {0, 1, 1, 0, 1});
-  strings_column_wrapper const strings_col2({"different but null",
-                                             "The quick brown fox",
-                                             "jumps over the lazy dog.",
-                                             "I am Jack's complete lack of null value",
-                                             R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
-                                            {0, 1, 1, 0, 1});
+  cudf::test::strings_column_wrapper const strings_col1(
+    {"",
+     "The quick brown fox",
+     "jumps over the lazy dog.",
+     "All work and no play makes Jack a dull boy",
+     R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
+    {0, 1, 1, 0, 1});
+  cudf::test::strings_column_wrapper const strings_col2(
+    {"different but null",
+     "The quick brown fox",
+     "jumps over the lazy dog.",
+     "I am Jack's complete lack of null value",
+     R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
+    {0, 1, 1, 0, 1});
 
   // Nulls with different values should be equal
   using limits = std::numeric_limits<int32_t>;
-  fixed_width_column_wrapper<int32_t> const ints_col1({0, 100, -100, limits::min(), limits::max()},
-                                                      {1, 0, 0, 1, 1});
-  fixed_width_column_wrapper<int32_t> const ints_col2({0, -200, 200, limits::min(), limits::max()},
-                                                      {1, 0, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col1(
+    {0, 100, -100, limits::min(), limits::max()}, {1, 0, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col2(
+    {0, -200, 200, limits::min(), limits::max()}, {1, 0, 0, 1, 1});
 
   // Nulls with different values should be equal
   // Different truth values should be equal
-  fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1});
-  fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1});
 
   // Nulls with different values should be equal
   using ts = cudf::timestamp_s;
-  fixed_width_column_wrapper<ts, ts::duration> const secs_col1({ts::duration::zero(),
-                                                                static_cast<ts::duration>(100),
-                                                                static_cast<ts::duration>(-100),
-                                                                ts::duration::min(),
-                                                                ts::duration::max()},
-                                                               {1, 0, 0, 1, 1});
-  fixed_width_column_wrapper<ts, ts::duration> const secs_col2({ts::duration::zero(),
-                                                                static_cast<ts::duration>(-200),
-                                                                static_cast<ts::duration>(200),
-                                                                ts::duration::min(),
-                                                                ts::duration::max()},
-                                                               {1, 0, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<ts, ts::duration> const secs_col1(
+    {ts::duration::zero(),
+     static_cast<ts::duration>(100),
+     static_cast<ts::duration>(-100),
+     ts::duration::min(),
+     ts::duration::max()},
+    {1, 0, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<ts, ts::duration> const secs_col2(
+    {ts::duration::zero(),
+     static_cast<ts::duration>(-200),
+     static_cast<ts::duration>(200),
+     ts::duration::min(),
+     ts::duration::max()},
+    {1, 0, 0, 1, 1});
 
   auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1, secs_col1});
   auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2, secs_col2});
@@ -355,7 +357,7 @@ TYPED_TEST_SUITE(HashTestTyped, cudf::test::FixedWidthTypes);
 
 TYPED_TEST(HashTestTyped, Equality)
 {
-  fixed_width_column_wrapper<TypeParam, int32_t> const col{0, 127, 1, 2, 8};
+  cudf::test::fixed_width_column_wrapper<TypeParam, int32_t> const col{0, 127, 1, 2, 8};
   auto const input = cudf::table_view({col});
 
   // Hash of same input should be equal
@@ -377,8 +379,8 @@ TYPED_TEST(HashTestTyped, EqualityNulls)
   using T = TypeParam;
 
   // Nulls with different values should be equal
-  fixed_width_column_wrapper<T, int32_t> const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
-  fixed_width_column_wrapper<T, int32_t> const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
 
   auto const input1 = cudf::table_view({col1});
   auto const input2 = cudf::table_view({col2});
@@ -410,10 +412,11 @@ TYPED_TEST(HashTestFloatTyped, TestExtremes)
   T nan   = std::numeric_limits<T>::quiet_NaN();
   T inf   = std::numeric_limits<T>::infinity();
 
-  fixed_width_column_wrapper<T> const col({T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf});
-  fixed_width_column_wrapper<T> const col_neg_zero(
+  cudf::test::fixed_width_column_wrapper<T> const col(
+    {T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf});
+  cudf::test::fixed_width_column_wrapper<T> const col_neg_zero(
     {T(-0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf});
-  fixed_width_column_wrapper<T> const col_neg_nan(
+  cudf::test::fixed_width_column_wrapper<T> const col_neg_nan(
     {T(0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf});
 
   auto const table_col          = cudf::table_view({col});
@@ -505,76 +508,77 @@ TEST_F(SparkMurmurHash3Test, MultiValueWithSeeds)
   println(s"combined => ${df.select(hash(col("*"))).collect.mkString(",")}")
   */
 
-  fixed_width_column_wrapper<int32_t> const hash_structs_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_structs_expected(
     {-105406170, 90479889, -678041645, 1667387937, 301478567});
-  fixed_width_column_wrapper<int32_t> const hash_strings_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_strings_expected(
     {142593372, 1217302703, -715697185, -2061143941, -111635966});
-  fixed_width_column_wrapper<int32_t> const hash_doubles_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_doubles_expected(
     {-1670924195, -853646085, -1281358385, 1897734433, -508695674});
-  fixed_width_column_wrapper<int32_t> const hash_timestamps_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_timestamps_expected(
     {-1670924195, 1114849490, 904948192, -1832979433, 1752430209});
-  fixed_width_column_wrapper<int32_t> const hash_decimal64_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_decimal64_expected(
     {-1670924195, 1114849490, 904948192, 1962370902, -1795328666});
-  fixed_width_column_wrapper<int32_t> const hash_longs_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_longs_expected(
     {-1670924195, 1114849490, 904948192, -853646085, -1604625029});
-  fixed_width_column_wrapper<int32_t> const hash_floats_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_floats_expected(
     {933211791, 723455942, -349261430, -1225560532, -338752985});
-  fixed_width_column_wrapper<int32_t> const hash_dates_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_dates_expected(
     {933211791, 751823303, -1080202046, -1906567553, -1503850410});
-  fixed_width_column_wrapper<int32_t> const hash_decimal32_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_decimal32_expected(
     {-1670924195, 1114849490, 904948192, -1454351396, -193774131});
-  fixed_width_column_wrapper<int32_t> const hash_ints_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_ints_expected(
     {933211791, 751823303, -1080202046, 723455942, 133916647});
-  fixed_width_column_wrapper<int32_t> const hash_shorts_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_shorts_expected(
     {933211791, 751823303, -1080202046, -1871935946, 1249274084});
-  fixed_width_column_wrapper<int32_t> const hash_bytes_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_bytes_expected(
     {933211791, 751823303, -1080202046, 1110053733, 1135925485});
-  fixed_width_column_wrapper<int32_t> const hash_bools_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_bools_expected(
     {933211791, -559580957, -559580957, -559580957, 933211791});
-  fixed_width_column_wrapper<int32_t> const hash_decimal128_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_decimal128_expected(
     {-783713497, -295670906, 1398487324, -52622807, -1359749815});
-  fixed_width_column_wrapper<int32_t> const hash_combined_expected(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_combined_expected(
     {401603227, 588162166, 552160517, 1132537411, -326043017});
 
   using double_limits = std::numeric_limits<double>;
   using long_limits   = std::numeric_limits<int64_t>;
   using float_limits  = std::numeric_limits<float>;
   using int_limits    = std::numeric_limits<int32_t>;
-  fixed_width_column_wrapper<int32_t> a_col{0, 100, -100, 0x1234'5678, -0x7654'3210};
-  strings_column_wrapper b_col{"a", "bc", "def", "ghij", "klmno"};
-  fixed_width_column_wrapper<float> x_col{
+  cudf::test::fixed_width_column_wrapper<int32_t> a_col{0, 100, -100, 0x1234'5678, -0x7654'3210};
+  cudf::test::strings_column_wrapper b_col{"a", "bc", "def", "ghij", "klmno"};
+  cudf::test::fixed_width_column_wrapper<float> x_col{
     0.f, 100.f, -100.f, float_limits::infinity(), -float_limits::infinity()};
-  fixed_width_column_wrapper<int64_t> y_col{
+  cudf::test::fixed_width_column_wrapper<int64_t> y_col{
     0L, 100L, -100L, 0x0123'4567'89ab'cdefL, -0x0123'4567'89ab'cdefL};
-  structs_column_wrapper c_col{{x_col, y_col}};
-  structs_column_wrapper const structs_col{{a_col, b_col, c_col}};
-
-  strings_column_wrapper const strings_col({"",
-                                            "The quick brown fox",
-                                            "jumps over the lazy dog.",
-                                            "All work and no play makes Jack a dull boy",
-                                            "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"});
-  fixed_width_column_wrapper<double> const doubles_col(
+  cudf::test::structs_column_wrapper c_col{{x_col, y_col}};
+  cudf::test::structs_column_wrapper const structs_col{{a_col, b_col, c_col}};
+
+  cudf::test::strings_column_wrapper const strings_col(
+    {"",
+     "The quick brown fox",
+     "jumps over the lazy dog.",
+     "All work and no play makes Jack a dull boy",
+     "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"});
+  cudf::test::fixed_width_column_wrapper<double> const doubles_col(
     {0., -0., -double_limits::quiet_NaN(), double_limits::lowest(), double_limits::max()});
-  fixed_width_column_wrapper<cudf::timestamp_ms, cudf::timestamp_ms::rep> const timestamps_col(
-    {0L, 100L, -100L, long_limits::min() / 1000000, long_limits::max() / 1000000});
-  fixed_point_column_wrapper<int64_t> const decimal64_col(
+  cudf::test::fixed_width_column_wrapper<cudf::timestamp_ms, cudf::timestamp_ms::rep> const
+    timestamps_col({0L, 100L, -100L, long_limits::min() / 1000000, long_limits::max() / 1000000});
+  cudf::test::fixed_point_column_wrapper<int64_t> const decimal64_col(
     {0L, 100L, -100L, -999999999999999999L, 999999999999999999L}, numeric::scale_type{-7});
-  fixed_width_column_wrapper<int64_t> const longs_col(
+  cudf::test::fixed_width_column_wrapper<int64_t> const longs_col(
     {0L, 100L, -100L, long_limits::min(), long_limits::max()});
-  fixed_width_column_wrapper<float> const floats_col(
+  cudf::test::fixed_width_column_wrapper<float> const floats_col(
     {0.f, -0.f, -float_limits::quiet_NaN(), float_limits::lowest(), float_limits::max()});
-  fixed_width_column_wrapper<cudf::timestamp_D, cudf::timestamp_D::rep> dates_col(
+  cudf::test::fixed_width_column_wrapper<cudf::timestamp_D, cudf::timestamp_D::rep> dates_col(
     {0, 100, -100, int_limits::min() / 100, int_limits::max() / 100});
-  fixed_point_column_wrapper<int32_t> const decimal32_col({0, 100, -100, -999999999, 999999999},
-                                                          numeric::scale_type{-3});
-  fixed_width_column_wrapper<int32_t> const ints_col(
+  cudf::test::fixed_point_column_wrapper<int32_t> const decimal32_col(
+    {0, 100, -100, -999999999, 999999999}, numeric::scale_type{-3});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
     {0, 100, -100, int_limits::min(), int_limits::max()});
-  fixed_width_column_wrapper<int16_t> const shorts_col({0, 100, -100, -32768, 32767});
-  fixed_width_column_wrapper<int8_t> const bytes_col({0, 100, -100, -128, 127});
-  fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
-  fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
-  fixed_point_column_wrapper<__int128_t> const decimal128_col(
+  cudf::test::fixed_width_column_wrapper<int16_t> const shorts_col({0, 100, -100, -32768, 32767});
+  cudf::test::fixed_width_column_wrapper<int8_t> const bytes_col({0, 100, -100, -128, 127});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
+  cudf::test::fixed_point_column_wrapper<__int128_t> const decimal128_col(
     {static_cast<__int128>(0),
      static_cast<__int128>(100),
      static_cast<__int128>(-1),
@@ -644,14 +648,15 @@ TEST_F(SparkMurmurHash3Test, StringsWithSeed)
   //   .map(org.apache.spark.sql.catalyst.expressions.Murmur3HashFunction.hash(
   //     _, org.apache.spark.sql.types.StringType, 314)))
 
-  fixed_width_column_wrapper<int32_t> const hash_strings_expected_seed_314(
+  cudf::test::fixed_width_column_wrapper<int32_t> const hash_strings_expected_seed_314(
     {1467149710, 723257560, -1620282500, -2001858707, 1588473657});
 
-  strings_column_wrapper const strings_col({"",
-                                            "The quick brown fox",
-                                            "jumps over the lazy dog.",
-                                            "All work and no play makes Jack a dull boy",
-                                            "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"});
+  cudf::test::strings_column_wrapper const strings_col(
+    {"",
+     "The quick brown fox",
+     "jumps over the lazy dog.",
+     "All work and no play makes Jack a dull boy",
+     "!\"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\ud720\ud721"});
 
   constexpr auto hasher   = cudf::hash_id::HASH_SPARK_MURMUR3;
   auto const hash_strings = cudf::hash(cudf::table_view({strings_col}), hasher, 314);
@@ -691,27 +696,28 @@ TEST_F(SparkMurmurHash3Test, ListValues)
   df2.show(false)
   */
 
-  auto const null  = -1;
-  auto nested_list = cudf::test::lists_column_wrapper<int>({{},
-                                                            {1},
-                                                            {1, 2},
-                                                            {1, 2, 3},
-                                                            {1, 2},
-                                                            {3},
-                                                            {1},
-                                                            {2, 3},
-                                                            {1},
-                                                            {{null, 2, 3}, nulls_at({0})},
-                                                            {1, 2},
-                                                            {3},
-                                                            {{null}, nulls_at({0})},
-                                                            {1, 2},
-                                                            {},
-                                                            {3}},
-                                                           nulls_at({0, 14}));
+  auto const null = -1;
+  auto nested_list =
+    cudf::test::lists_column_wrapper<int>({{},
+                                           {1},
+                                           {1, 2},
+                                           {1, 2, 3},
+                                           {1, 2},
+                                           {3},
+                                           {1},
+                                           {2, 3},
+                                           {1},
+                                           {{null, 2, 3}, cudf::test::iterators::nulls_at({0})},
+                                           {1, 2},
+                                           {3},
+                                           {{null}, cudf::test::iterators::nulls_at({0})},
+                                           {1, 2},
+                                           {},
+                                           {3}},
+                                          cudf::test::iterators::nulls_at({0, 14}));
   auto offsets =
     cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 0, 1, 2, 3, 4, 6, 8, 10, 13, 16};
-  auto list_validity        = nulls_at({0});
+  auto list_validity        = cudf::test::iterators::nulls_at({0});
   auto list_validity_buffer = cudf::test::detail::make_null_mask(list_validity, list_validity + 11);
   auto list_column          = cudf::make_lists_column(11,
                                              offsets.release(),
@@ -766,11 +772,18 @@ TEST_F(SparkMurmurHash3Test, StructOfListValues)
   */
 
   auto const null = -1;
-  auto col1       = cudf::test::lists_column_wrapper<int>(
-    {{}, {0}, {{1, null}, nulls_at({1})}, {{1, null}, nulls_at({1})}, {}, {} /*NULL*/, {2, 3}},
-    nulls_at({5}));
+  auto col1 =
+    cudf::test::lists_column_wrapper<int>({{},
+                                           {0},
+                                           {{1, null}, cudf::test::iterators::nulls_at({1})},
+                                           {{1, null}, cudf::test::iterators::nulls_at({1})},
+                                           {},
+                                           {} /*NULL*/,
+                                           {2, 3}},
+                                          cudf::test::iterators::nulls_at({5}));
   auto col2 = cudf::test::lists_column_wrapper<int>(
-    {{}, {0}, {} /*NULL*/, {}, {{null, 1}, nulls_at({0})}, {1}, {4, 5}}, nulls_at({2}));
+    {{}, {0}, {} /*NULL*/, {}, {{null, 1}, cudf::test::iterators::nulls_at({0})}, {1}, {4, 5}},
+    cudf::test::iterators::nulls_at({2}));
   auto struct_column = cudf::test::structs_column_wrapper{{col1, col2}};
 
   auto expect = cudf::test::fixed_width_column_wrapper<int32_t>{
@@ -813,12 +826,15 @@ TEST_F(SparkMurmurHash3Test, ListOfStructValues)
   */
 
   auto const null = -1;
-  auto col1 = fixed_width_column_wrapper<int32_t>({0, null, null, 1, null, null, 2, 2, null, 2, 4},
-                                                  nulls_at({1, 2, 4, 5, 8}));
-  auto col2 = fixed_width_column_wrapper<int32_t>({0, null, null, null, 1, 1, 3, 3, null, 3, 5},
-                                                  nulls_at({1, 2, 3, 8}));
-  auto struct_column = structs_column_wrapper{{col1, col2}, {1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1}};
-  auto offsets       = fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3, 4, 5, 7, 9, 11};
+  auto col1       = cudf::test::fixed_width_column_wrapper<int32_t>(
+    {0, null, null, 1, null, null, 2, 2, null, 2, 4},
+    cudf::test::iterators::nulls_at({1, 2, 4, 5, 8}));
+  auto col2 = cudf::test::fixed_width_column_wrapper<int32_t>(
+    {0, null, null, null, 1, 1, 3, 3, null, 3, 5}, cudf::test::iterators::nulls_at({1, 2, 3, 8}));
+  auto struct_column =
+    cudf::test::structs_column_wrapper{{col1, col2}, {1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1}};
+  auto offsets =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 2, 3, 4, 5, 7, 9, 11};
   auto list_nullmask = std::vector<bool>(1, 8);
   auto list_validity_buffer =
     cudf::test::detail::make_null_mask(list_nullmask.begin(), list_nullmask.end());
@@ -847,7 +863,7 @@ class MD5HashTest : public cudf::test::BaseFixture {
 
 TEST_F(MD5HashTest, MultiValue)
 {
-  strings_column_wrapper const strings_col(
+  cudf::test::strings_column_wrapper const strings_col(
     {"",
      "A 60 character string to test MD5's message padding algorithm",
      "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
@@ -855,24 +871,27 @@ TEST_F(MD5HashTest, MultiValue)
      "All work and no play makes Jack a dull boy",
      R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
 
-  strings_column_wrapper const md5_string_results1({"d41d8cd98f00b204e9800998ecf8427e",
-                                                    "682240021651ae166d08fe2a014d5c09",
-                                                    "3669d5225fddbb34676312ca3b78bbd9",
-                                                    "c61a4185135eda043f35e92c3505e180",
-                                                    "52da74c75cb6575d25be29e66bd0adde"});
+  cudf::test::strings_column_wrapper const md5_string_results1(
+    {"d41d8cd98f00b204e9800998ecf8427e",
+     "682240021651ae166d08fe2a014d5c09",
+     "3669d5225fddbb34676312ca3b78bbd9",
+     "c61a4185135eda043f35e92c3505e180",
+     "52da74c75cb6575d25be29e66bd0adde"});
 
-  strings_column_wrapper const md5_string_results2({"d41d8cd98f00b204e9800998ecf8427e",
-                                                    "e5a5682e82278e78dbaad9a689df7a73",
-                                                    "4121ab1bb6e84172fd94822645862ae9",
-                                                    "28970886501efe20164213855afe5850",
-                                                    "6bc1b872103cc6a02d882245b8516e2e"});
+  cudf::test::strings_column_wrapper const md5_string_results2(
+    {"d41d8cd98f00b204e9800998ecf8427e",
+     "e5a5682e82278e78dbaad9a689df7a73",
+     "4121ab1bb6e84172fd94822645862ae9",
+     "28970886501efe20164213855afe5850",
+     "6bc1b872103cc6a02d882245b8516e2e"});
 
   using limits = std::numeric_limits<int32_t>;
-  fixed_width_column_wrapper<int32_t> const ints_col({0, 100, -100, limits::min(), limits::max()});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col(
+    {0, 100, -100, limits::min(), limits::max()});
 
   // Different truth values should be equal
-  fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
-  fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 1, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 1, 2, 255, 0});
 
   auto const string_input1      = cudf::table_view({strings_col});
   auto const string_input2      = cudf::table_view({strings_col, strings_col});
@@ -894,7 +913,7 @@ TEST_F(MD5HashTest, MultiValue)
 TEST_F(MD5HashTest, MultiValueNulls)
 {
   // Nulls with different values should be equal
-  strings_column_wrapper const strings_col1(
+  cudf::test::strings_column_wrapper const strings_col1(
     {"",
      "Different but null!",
      "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
@@ -902,7 +921,7 @@ TEST_F(MD5HashTest, MultiValueNulls)
      "All work and no play makes Jack a dull boy",
      R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"},
     {1, 0, 0, 1, 0});
-  strings_column_wrapper const strings_col2(
+  cudf::test::strings_column_wrapper const strings_col2(
     {"",
      "A 60 character string to test MD5's message padding algorithm",
      "Very different... but null",
@@ -912,15 +931,15 @@ TEST_F(MD5HashTest, MultiValueNulls)
 
   // Nulls with different values should be equal
   using limits = std::numeric_limits<int32_t>;
-  fixed_width_column_wrapper<int32_t> const ints_col1({0, 100, -100, limits::min(), limits::max()},
-                                                      {1, 0, 0, 1, 1});
-  fixed_width_column_wrapper<int32_t> const ints_col2({0, -200, 200, limits::min(), limits::max()},
-                                                      {1, 0, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col1(
+    {0, 100, -100, limits::min(), limits::max()}, {1, 0, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> const ints_col2(
+    {0, -200, 200, limits::min(), limits::max()}, {1, 0, 0, 1, 1});
 
   // Nulls with different values should be equal
   // Different truth values should be equal
-  fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1});
-  fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col1({0, 1, 0, 1, 1}, {1, 1, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const bools_col2({0, 2, 1, 0, 255}, {1, 1, 0, 0, 1});
 
   auto const input1 = cudf::table_view({strings_col1, ints_col1, bools_col1});
   auto const input2 = cudf::table_view({strings_col2, ints_col2, bools_col2});
@@ -936,7 +955,7 @@ TEST_F(MD5HashTest, StringListsNulls)
 {
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0; });
 
-  strings_column_wrapper const strings_col(
+  cudf::test::strings_column_wrapper const strings_col(
     {"",
      "A 60 character string to test MD5's message padding algorithm",
      "A very long (greater than 128 bytes/char string) to test a multi hash-step data point in the "
@@ -944,7 +963,7 @@ TEST_F(MD5HashTest, StringListsNulls)
      "All work and no play makes Jack a dull boy",
      R"(!"#$%&'()*+,-./0123456789:;<=>?@[\]^_`{|}~)"});
 
-  lists_column_wrapper<cudf::string_view> strings_list_col(
+  cudf::test::lists_column_wrapper<cudf::string_view> strings_list_col(
     {{""},
      {{"NULL", "A 60 character string to test MD5's message padding algorithm"}, validity},
      {"A very long (greater than 128 bytes/char string) to test a multi hash-step data point in "
@@ -971,7 +990,7 @@ TYPED_TEST_SUITE(MD5HashTestTyped, cudf::test::NumericTypes);
 
 TYPED_TEST(MD5HashTestTyped, Equality)
 {
-  fixed_width_column_wrapper<TypeParam> const col({0, 127, 1, 2, 8});
+  cudf::test::fixed_width_column_wrapper<TypeParam> const col({0, 127, 1, 2, 8});
   auto const input = cudf::table_view({col});
 
   // Hash of same input should be equal
@@ -987,8 +1006,8 @@ TYPED_TEST(MD5HashTestTyped, EqualityNulls)
   using T = TypeParam;
 
   // Nulls with different values should be equal
-  fixed_width_column_wrapper<T> const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
-  fixed_width_column_wrapper<T> const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> const col1({0, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T> const col2({1, 127, 1, 2, 8}, {0, 1, 1, 1, 1});
 
   auto const input1 = cudf::table_view({col1});
   auto const input2 = cudf::table_view({col2});
@@ -1002,15 +1021,15 @@ TYPED_TEST(MD5HashTestTyped, EqualityNulls)
 
 TEST_F(MD5HashTest, TestBoolListsWithNulls)
 {
-  fixed_width_column_wrapper<bool> const col1({0, 255, 255, 16, 27, 18, 100, 1, 2},
-                                              {1, 0, 0, 0, 1, 1, 1, 0, 0});
-  fixed_width_column_wrapper<bool> const col2({0, 255, 255, 32, 81, 68, 3, 101, 4},
-                                              {1, 0, 0, 1, 0, 1, 0, 1, 0});
-  fixed_width_column_wrapper<bool> const col3({0, 255, 255, 64, 49, 42, 5, 6, 102},
-                                              {1, 0, 0, 1, 1, 0, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<bool> const col1({0, 255, 255, 16, 27, 18, 100, 1, 2},
+                                                          {1, 0, 0, 0, 1, 1, 1, 0, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const col2({0, 255, 255, 32, 81, 68, 3, 101, 4},
+                                                          {1, 0, 0, 1, 0, 1, 0, 1, 0});
+  cudf::test::fixed_width_column_wrapper<bool> const col3({0, 255, 255, 64, 49, 42, 5, 6, 102},
+                                                          {1, 0, 0, 1, 1, 0, 0, 0, 1});
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; });
-  lists_column_wrapper<bool> const list_col(
+  cudf::test::lists_column_wrapper<bool> const list_col(
     {{0, 0, 0}, {1}, {}, {{1, 1, 1}, validity}, {1, 1}, {1, 1}, {1}, {1}, {1}}, validity);
 
   auto const input1 = cudf::table_view({col1, col2, col3});
@@ -1027,22 +1046,23 @@ template <typename T>
 class MD5HashListTestTyped : public cudf::test::BaseFixture {
 };
 
-using NumericTypesNoBools = Concat<IntegralTypesNotBool, FloatingPointTypes>;
+using NumericTypesNoBools =
+  cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
 TYPED_TEST_SUITE(MD5HashListTestTyped, NumericTypesNoBools);
 
 TYPED_TEST(MD5HashListTestTyped, TestListsWithNulls)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> const col1({0, 255, 255, 16, 27, 18, 100, 1, 2},
-                                           {1, 0, 0, 0, 1, 1, 1, 0, 0});
-  fixed_width_column_wrapper<T> const col2({0, 255, 255, 32, 81, 68, 3, 101, 4},
-                                           {1, 0, 0, 1, 0, 1, 0, 1, 0});
-  fixed_width_column_wrapper<T> const col3({0, 255, 255, 64, 49, 42, 5, 6, 102},
-                                           {1, 0, 0, 1, 1, 0, 0, 0, 1});
+  cudf::test::fixed_width_column_wrapper<T> const col1({0, 255, 255, 16, 27, 18, 100, 1, 2},
+                                                       {1, 0, 0, 0, 1, 1, 1, 0, 0});
+  cudf::test::fixed_width_column_wrapper<T> const col2({0, 255, 255, 32, 81, 68, 3, 101, 4},
+                                                       {1, 0, 0, 1, 0, 1, 0, 1, 0});
+  cudf::test::fixed_width_column_wrapper<T> const col3({0, 255, 255, 64, 49, 42, 5, 6, 102},
+                                                       {1, 0, 0, 1, 1, 0, 0, 0, 1});
 
   auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 1; });
-  lists_column_wrapper<T> const list_col(
+  cudf::test::lists_column_wrapper<T> const list_col(
     {{0, 0, 0}, {127}, {}, {{32, 127, 64}, validity}, {27, 49}, {18, 68}, {100}, {101}, {102}},
     validity);
 
@@ -1070,8 +1090,9 @@ TYPED_TEST(MD5HashTestFloatTyped, TestExtremes)
   T nan   = std::numeric_limits<T>::quiet_NaN();
   T inf   = std::numeric_limits<T>::infinity();
 
-  fixed_width_column_wrapper<T> const col1({T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf});
-  fixed_width_column_wrapper<T> const col2(
+  cudf::test::fixed_width_column_wrapper<T> const col1(
+    {T(0.0), T(100.0), T(-100.0), min, max, nan, inf, -inf});
+  cudf::test::fixed_width_column_wrapper<T> const col2(
     {T(-0.0), T(100.0), T(-100.0), min, max, -nan, inf, -inf});
 
   auto const input1 = cudf::table_view({col1});
@@ -1091,9 +1112,9 @@ TYPED_TEST(MD5HashTestFloatTyped, TestListExtremes)
   T nan   = std::numeric_limits<T>::quiet_NaN();
   T inf   = std::numeric_limits<T>::infinity();
 
-  lists_column_wrapper<T> const col1(
+  cudf::test::lists_column_wrapper<T> const col1(
     {{T(0.0)}, {T(100.0), T(-100.0)}, {min, max, nan}, {inf, -inf}});
-  lists_column_wrapper<T> const col2(
+  cudf::test::lists_column_wrapper<T> const col2(
     {{T(-0.0)}, {T(100.0), T(-100.0)}, {min, max, -nan}, {inf, -inf}});
 
   auto const input1 = cudf::table_view({col1});
diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp
index da9f80cf3d7..2862590d05f 100644
--- a/cpp/tests/interop/dlpack_test.cpp
+++ b/cpp/tests/interop/dlpack_test.cpp
@@ -24,8 +24,6 @@
 
 #include <thrust/host_vector.h>
 
-using namespace cudf::test;
-
 struct dlpack_deleter {
   void operator()(DLManagedTensor* tensor) { tensor->deleter(tensor); }
 };
@@ -61,7 +59,7 @@ void validate_dtype(DLDataType const& dtype)
   EXPECT_EQ(sizeof(T) * 8, dtype.bits);
 }
 
-class DLPackUntypedTests : public BaseFixture {
+class DLPackUntypedTests : public cudf::test::BaseFixture {
 };
 
 TEST_F(DLPackUntypedTests, EmptyTableToDlpack)
@@ -73,8 +71,8 @@ TEST_F(DLPackUntypedTests, EmptyTableToDlpack)
 
 TEST_F(DLPackUntypedTests, EmptyColsToDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col1({});
-  fixed_width_column_wrapper<int32_t> col2({});
+  cudf::test::fixed_width_column_wrapper<int32_t> col1({});
+  cudf::test::fixed_width_column_wrapper<int32_t> col2({});
   cudf::table_view input({col1, col2});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
   validate_dtype<int32_t>(tensor->dl_tensor.dtype);
@@ -97,30 +95,30 @@ TEST_F(DLPackUntypedTests, NullTensorFromDlpack)
 
 TEST_F(DLPackUntypedTests, MultipleTypesToDlpack)
 {
-  fixed_width_column_wrapper<int16_t> col1({1, 2, 3, 4});
-  fixed_width_column_wrapper<int32_t> col2({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int16_t> col1({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col2({1, 2, 3, 4});
   cudf::table_view input({col1, col2});
   EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error);
 }
 
 TEST_F(DLPackUntypedTests, InvalidNullsToDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col1({1, 2, 3, 4});
-  fixed_width_column_wrapper<int32_t> col2({1, 2, 3, 4}, {1, 0, 1, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> col1({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col2({1, 2, 3, 4}, {1, 0, 1, 1});
   cudf::table_view input({col1, col2});
   EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error);
 }
 
 TEST_F(DLPackUntypedTests, StringTypeToDlpack)
 {
-  strings_column_wrapper col({"foo", "bar", "baz"});
+  cudf::test::strings_column_wrapper col({"foo", "bar", "baz"});
   cudf::table_view input({col});
   EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error);
 }
 
 TEST_F(DLPackUntypedTests, UnsupportedDeviceTypeFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -131,7 +129,7 @@ TEST_F(DLPackUntypedTests, UnsupportedDeviceTypeFromDlpack)
 
 TEST_F(DLPackUntypedTests, InvalidDeviceIdFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -142,7 +140,7 @@ TEST_F(DLPackUntypedTests, InvalidDeviceIdFromDlpack)
 
 TEST_F(DLPackUntypedTests, UnsupportedDimsFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -153,7 +151,7 @@ TEST_F(DLPackUntypedTests, UnsupportedDimsFromDlpack)
 
 TEST_F(DLPackUntypedTests, TooManyRowsFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -165,8 +163,8 @@ TEST_F(DLPackUntypedTests, TooManyRowsFromDlpack)
 
 TEST_F(DLPackUntypedTests, TooManyColsFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col1({1, 2, 3, 4});
-  fixed_width_column_wrapper<int32_t> col2({5, 6, 7, 8});
+  cudf::test::fixed_width_column_wrapper<int32_t> col1({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col2({5, 6, 7, 8});
   cudf::table_view input({col1, col2});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -178,7 +176,7 @@ TEST_F(DLPackUntypedTests, TooManyColsFromDlpack)
 
 TEST_F(DLPackUntypedTests, InvalidTypeFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -189,7 +187,7 @@ TEST_F(DLPackUntypedTests, InvalidTypeFromDlpack)
 
 TEST_F(DLPackUntypedTests, UnsupportedIntBitsizeFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -200,7 +198,7 @@ TEST_F(DLPackUntypedTests, UnsupportedIntBitsizeFromDlpack)
 
 TEST_F(DLPackUntypedTests, UnsupportedFloatBitsizeFromDlpack)
 {
-  fixed_width_column_wrapper<float> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<float> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -211,7 +209,7 @@ TEST_F(DLPackUntypedTests, UnsupportedFloatBitsizeFromDlpack)
 
 TEST_F(DLPackUntypedTests, UnsupportedLanesFromDlpack)
 {
-  fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -335,20 +333,20 @@ TEST_F(DLPackUntypedTests, UnsupportedStridedColMajor2DTensorFromDlpack)
 }
 
 template <typename T>
-class DLPackTimestampTests : public BaseFixture {
+class DLPackTimestampTests : public cudf::test::BaseFixture {
 };
 
-TYPED_TEST_SUITE(DLPackTimestampTests, ChronoTypes);
+TYPED_TEST_SUITE(DLPackTimestampTests, cudf::test::ChronoTypes);
 
 TYPED_TEST(DLPackTimestampTests, ChronoTypesToDlpack)
 {
-  fixed_width_column_wrapper<TypeParam, int32_t> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<TypeParam, int32_t> col({1, 2, 3, 4});
   cudf::table_view input({col});
   EXPECT_THROW(cudf::to_dlpack(input), cudf::logic_error);
 }
 
 template <typename T>
-class DLPackNumericTests : public BaseFixture {
+class DLPackNumericTests : public cudf::test::BaseFixture {
 };
 
 // The list of supported types comes from DLDataType_to_data_type() in cpp/src/dlpack/dlpack.cpp
@@ -360,7 +358,7 @@ TYPED_TEST_SUITE(DLPackNumericTests, SupportedTypes);
 TYPED_TEST(DLPackNumericTests, ToDlpack1D)
 {
   // Test nullable column with no nulls
-  fixed_width_column_wrapper<TypeParam> col({1, 2, 3, 4}, {1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<TypeParam> col({1, 2, 3, 4}, {1, 1, 1, 1});
   auto const col_view = static_cast<cudf::column_view>(col);
   EXPECT_FALSE(col_view.has_nulls());
   EXPECT_TRUE(col_view.nullable());
@@ -389,9 +387,11 @@ TYPED_TEST(DLPackNumericTests, ToDlpack2D)
   using T             = TypeParam;
   auto const col1_tmp = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
   auto const col2_tmp = cudf::test::make_type_param_vector<T>({4, 5, 6, 7});
-  std::vector<fixed_width_column_wrapper<TypeParam>> cols;
-  cols.push_back(fixed_width_column_wrapper<TypeParam>(col1_tmp.cbegin(), col1_tmp.cend()));
-  cols.push_back(fixed_width_column_wrapper<TypeParam>(col2_tmp.cbegin(), col2_tmp.cend()));
+  std::vector<cudf::test::fixed_width_column_wrapper<TypeParam>> cols;
+  cols.push_back(
+    cudf::test::fixed_width_column_wrapper<TypeParam>(col1_tmp.cbegin(), col1_tmp.cend()));
+  cols.push_back(
+    cudf::test::fixed_width_column_wrapper<TypeParam>(col2_tmp.cbegin(), col2_tmp.cend()));
 
   std::vector<cudf::column_view> col_views;
   std::transform(cols.begin(), cols.end(), std::back_inserter(col_views), [](auto const& col) {
@@ -427,7 +427,7 @@ TYPED_TEST(DLPackNumericTests, ToDlpack2D)
 TYPED_TEST(DLPackNumericTests, FromDlpack1D)
 {
   // Use to_dlpack to generate an input tensor
-  fixed_width_column_wrapper<TypeParam> col({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<TypeParam> col({1, 2, 3, 4});
   cudf::table_view input({col});
   unique_managed_tensor tensor(cudf::to_dlpack(input));
 
@@ -442,9 +442,9 @@ TYPED_TEST(DLPackNumericTests, FromDlpack2D)
   using T         = TypeParam;
   auto const col1 = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
   auto const col2 = cudf::test::make_type_param_vector<T>({4, 5, 6, 7});
-  std::vector<fixed_width_column_wrapper<TypeParam>> cols;
-  cols.push_back(fixed_width_column_wrapper<T>(col1.cbegin(), col1.cend()));
-  cols.push_back(fixed_width_column_wrapper<T>(col2.cbegin(), col2.cend()));
+  std::vector<cudf::test::fixed_width_column_wrapper<TypeParam>> cols;
+  cols.push_back(cudf::test::fixed_width_column_wrapper<T>(col1.cbegin(), col1.cend()));
+  cols.push_back(cudf::test::fixed_width_column_wrapper<T>(col2.cbegin(), col2.cend()));
 
   std::vector<cudf::column_view> col_views;
   std::transform(cols.begin(), cols.end(), std::back_inserter(col_views), [](auto const& col) {
@@ -479,8 +479,8 @@ TYPED_TEST(DLPackNumericTests, FromDlpackCpu)
   thrust::host_vector<T> host_vector(data.begin(), data.end());
   tensor.dl_tensor.data = host_vector.data();
 
-  fixed_width_column_wrapper<TypeParam> col1({1, 2, 3, 4});
-  fixed_width_column_wrapper<TypeParam> col2({5, 6, 7, 8});
+  cudf::test::fixed_width_column_wrapper<TypeParam> col1({1, 2, 3, 4});
+  cudf::test::fixed_width_column_wrapper<TypeParam> col2({5, 6, 7, 8});
   cudf::table_view expected({col1, col2});
 
   auto result = cudf::from_dlpack(&tensor);
diff --git a/cpp/tests/lists/explode_tests.cpp b/cpp/tests/lists/explode_tests.cpp
index fd22932916f..1a20a88df96 100644
--- a/cpp/tests/lists/explode_tests.cpp
+++ b/cpp/tests/lists/explode_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,9 +22,8 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/lists/explode.hpp>
 
-using namespace cudf::test;
-using FCW = fixed_width_column_wrapper<int32_t>;
-using LCW = lists_column_wrapper<int32_t>;
+using FCW = cudf::test::fixed_width_column_wrapper<int32_t>;
+using LCW = cudf::test::lists_column_wrapper<int32_t>;
 
 class ExplodeTest : public cudf::test::BaseFixture {
 };
@@ -78,11 +77,11 @@ TEST_F(ExplodeTest, Basics)
 
   FCW a{100, 200, 300};
   LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}};
-  strings_column_wrapper c{"string0", "string1", "string2"};
+  cudf::test::strings_column_wrapper c{"string0", "string1", "string2"};
 
   FCW expected_a{100, 100, 100, 200, 200, 300, 300};
   FCW expected_b{1, 2, 7, 5, 6, 0, 3};
-  strings_column_wrapper expected_c{
+  cudf::test::strings_column_wrapper expected_c{
     "string0", "string0", "string0", "string1", "string1", "string2", "string2"};
 
   cudf::table_view t({a, b, c});
@@ -347,14 +346,14 @@ TEST_F(ExplodeTest, NestedStructs)
          LCW{LCW{5, 6}},
          LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}});
   FCW b1({100, 200, 300});
-  strings_column_wrapper b2{"100", "200", "300"};
-  structs_column_wrapper b({b1, b2});
+  cudf::test::strings_column_wrapper b2{"100", "200", "300"};
+  cudf::test::structs_column_wrapper b({b1, b2});
 
   LCW expected_a{
     LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)};
   FCW expected_b1{100, 100, 200, 300, 300, 300};
-  strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"};
-  structs_column_wrapper expected_b({expected_b1, expected_b2});
+  cudf::test::strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"};
+  cudf::test::structs_column_wrapper expected_b({expected_b1, expected_b2});
 
   cudf::table_view t({a, b});
   cudf::table_view expected({expected_a, expected_b});
@@ -435,15 +434,16 @@ TEST_F(ExplodeTest, ListOfStructsWithEmpties)
   // concatenated
   auto final_col =
     cudf::concatenate(std::vector<cudf::column_view>({*row0, *row1, *row2, *row3, *row4}));
-  auto s = strings_column_wrapper({"a", "b", "c", "d", "e"}).release();
+  auto s = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "e"}).release();
 
   cudf::table_view t({final_col->view(), s->view()});
 
-  auto ret                  = cudf::explode(t, 0);
-  auto expected_numeric_col = fixed_width_column_wrapper<int32_t>{{1, null, null}, {1, 0, 0}};
+  auto ret = cudf::explode(t, 0);
+  auto expected_numeric_col =
+    cudf::test::fixed_width_column_wrapper<int32_t>{{1, null, null}, {1, 0, 0}};
 
-  auto expected_a = structs_column_wrapper{{expected_numeric_col}, {1, 1, 0}}.release();
-  auto expected_b = strings_column_wrapper({"a", "b", "c"}).release();
+  auto expected_a = cudf::test::structs_column_wrapper{{expected_numeric_col}, {1, 1, 0}}.release();
+  auto expected_b = cudf::test::strings_column_wrapper({"a", "b", "c"}).release();
 
   cudf::table_view expected({expected_a->view(), expected_b->view()});
 
@@ -464,10 +464,11 @@ TYPED_TEST(ExplodeTypedTest, ListOfStructs)
   //  [{25, "25"}, {30, "30"}] 400
   //  [{15, "15"}, {20, "20"}] 500
 
-  auto numeric_col =
-    fixed_width_column_wrapper<TypeParam, int32_t>{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
-  strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"};
-  auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release();
+  auto numeric_col = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{
+    {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
+  cudf::test::strings_column_wrapper string_col{
+    "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"};
+  auto struct_col = cudf::test::structs_column_wrapper{{numeric_col, string_col}}.release();
   auto a          = cudf::make_lists_column(
     5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {});
 
@@ -476,12 +477,13 @@ TYPED_TEST(ExplodeTypedTest, ListOfStructs)
   cudf::table_view t({a->view(), b});
   auto ret = cudf::explode(t, 0);
 
-  auto expected_numeric_col =
-    fixed_width_column_wrapper<TypeParam, int32_t>{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
-  strings_column_wrapper expected_string_col{
+  auto expected_numeric_col = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{
+    {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
+  cudf::test::strings_column_wrapper expected_string_col{
     "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"};
 
-  auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release();
+  auto expected_a =
+    cudf::test::structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release();
   FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500};
 
   cudf::table_view expected({expected_a->view(), expected_b});
@@ -570,11 +572,11 @@ TEST_F(ExplodeOuterTest, Basics)
 
   FCW a{100, 200, 300};
   LCW b{LCW{1, 2, 7}, LCW{5, 6}, LCW{0, 3}};
-  strings_column_wrapper c{"string0", "string1", "string2"};
+  cudf::test::strings_column_wrapper c{"string0", "string1", "string2"};
 
   FCW expected_a{100, 100, 100, 200, 200, 300, 300};
   FCW expected_b{1, 2, 7, 5, 6, 0, 3};
-  strings_column_wrapper expected_c{
+  cudf::test::strings_column_wrapper expected_c{
     "string0", "string0", "string0", "string1", "string1", "string2", "string2"};
 
   cudf::table_view t({a, b, c});
@@ -992,14 +994,14 @@ TEST_F(ExplodeOuterTest, NestedStructs)
          LCW{LCW{5, 6}},
          LCW{LCW{0, 3}, LCW{5}, LCW({2, null}, valids)}});
   FCW b1({100, 200, 300});
-  strings_column_wrapper b2{"100", "200", "300"};
-  structs_column_wrapper b({b1, b2});
+  cudf::test::strings_column_wrapper b2{"100", "200", "300"};
+  cudf::test::structs_column_wrapper b({b1, b2});
 
   LCW expected_a{
     LCW({1, null}, valids), LCW{7, 6, 5}, LCW{5, 6}, LCW{0, 3}, LCW{5}, LCW({2, null}, valids)};
   FCW expected_b1{100, 100, 200, 300, 300, 300};
-  strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"};
-  structs_column_wrapper expected_b({expected_b1, expected_b2});
+  cudf::test::strings_column_wrapper expected_b2{"100", "100", "200", "300", "300", "300"};
+  cudf::test::structs_column_wrapper expected_b({expected_b1, expected_b2});
 
   cudf::table_view t({a, b});
   cudf::table_view expected({expected_a, expected_b});
@@ -1080,17 +1082,18 @@ TEST_F(ExplodeOuterTest, ListOfStructsWithEmpties)
   // concatenated
   auto final_col =
     cudf::concatenate(std::vector<cudf::column_view>({*row0, *row1, *row2, *row3, *row4}));
-  auto s = strings_column_wrapper({"a", "b", "c", "d", "e"}).release();
+  auto s = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "e"}).release();
 
   cudf::table_view t({final_col->view(), s->view()});
 
   auto ret = cudf::explode_outer(t, 0);
 
   auto expected_numeric_col =
-    fixed_width_column_wrapper<int32_t>{{1, null, null, null, null}, {1, 0, 0, 0, 0}};
+    cudf::test::fixed_width_column_wrapper<int32_t>{{1, null, null, null, null}, {1, 0, 0, 0, 0}};
 
-  auto expected_a = structs_column_wrapper{{expected_numeric_col}, {1, 1, 0, 0, 0}}.release();
-  auto expected_b = strings_column_wrapper({"a", "b", "c", "d", "e"}).release();
+  auto expected_a =
+    cudf::test::structs_column_wrapper{{expected_numeric_col}, {1, 1, 0, 0, 0}}.release();
+  auto expected_b = cudf::test::strings_column_wrapper({"a", "b", "c", "d", "e"}).release();
 
   cudf::table_view expected({expected_a->view(), expected_b->view()});
 
@@ -1111,10 +1114,11 @@ TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs)
   //  [{25, "25"}, {30, "30"}] 400
   //  [{15, "15"}, {20, "20"}] 500
 
-  auto numeric_col =
-    fixed_width_column_wrapper<TypeParam, int32_t>{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
-  strings_column_wrapper string_col{"70", "75", "50", "55", "35", "45", "25", "30", "15", "20"};
-  auto struct_col = structs_column_wrapper{{numeric_col, string_col}}.release();
+  auto numeric_col = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{
+    {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
+  cudf::test::strings_column_wrapper string_col{
+    "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"};
+  auto struct_col = cudf::test::structs_column_wrapper{{numeric_col, string_col}}.release();
   auto a          = cudf::make_lists_column(
     5, FCW{0, 2, 4, 6, 8, 10}.release(), std::move(struct_col), cudf::UNKNOWN_NULL_COUNT, {});
 
@@ -1123,12 +1127,13 @@ TYPED_TEST(ExplodeOuterTypedTest, ListOfStructs)
   cudf::table_view t({a->view(), b});
   auto ret = cudf::explode_outer(t, 0);
 
-  auto expected_numeric_col =
-    fixed_width_column_wrapper<TypeParam, int32_t>{{70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
-  strings_column_wrapper expected_string_col{
+  auto expected_numeric_col = cudf::test::fixed_width_column_wrapper<TypeParam, int32_t>{
+    {70, 75, 50, 55, 35, 45, 25, 30, 15, 20}};
+  cudf::test::strings_column_wrapper expected_string_col{
     "70", "75", "50", "55", "35", "45", "25", "30", "15", "20"};
 
-  auto expected_a = structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release();
+  auto expected_a =
+    cudf::test::structs_column_wrapper{{expected_numeric_col, expected_string_col}}.release();
   FCW expected_b{100, 100, 200, 200, 300, 300, 400, 400, 500, 500};
 
   cudf::table_view expected({expected_a->view(), expected_b});
diff --git a/cpp/tests/reshape/byte_cast_tests.cpp b/cpp/tests/reshape/byte_cast_tests.cpp
index f29b3a7980f..6eafc9a2759 100644
--- a/cpp/tests/reshape/byte_cast_tests.cpp
+++ b/cpp/tests/reshape/byte_cast_tests.cpp
@@ -21,21 +21,20 @@
 #include <cudf/detail/iterator.cuh>
 #include <cudf/reshape.hpp>
 
-using namespace cudf::test;
-
 class ByteCastTest : public cudf::test::BaseFixture {
 };
 
 TEST_F(ByteCastTest, int16ValuesWithSplit)
 {
   using limits = std::numeric_limits<int16_t>;
-  fixed_width_column_wrapper<int16_t> const int16_col(
+  cudf::test::fixed_width_column_wrapper<int16_t> const int16_col(
     {short(0), short(100), short(-100), limits::min(), limits::max()});
-  lists_column_wrapper<uint8_t> const int16_expected(
+  cudf::test::lists_column_wrapper<uint8_t> const int16_expected(
     {{0x00, 0x00}, {0x64, 0x00}, {0x9c, 0xff}, {0x00, 0x80}, {0xff, 0x7f}});
-  lists_column_wrapper<uint8_t> const int16_expected_slice1(
+  cudf::test::lists_column_wrapper<uint8_t> const int16_expected_slice1(
     {{0x00, 0x00}, {0x00, 0x64}, {0xff, 0x9c}});
-  lists_column_wrapper<uint8_t> const int16_expected_slice2({{0x80, 0x00}, {0x7f, 0xff}});
+  cudf::test::lists_column_wrapper<uint8_t> const int16_expected_slice2(
+    {{0x80, 0x00}, {0x7f, 0xff}});
 
   std::vector<cudf::size_type> splits({3});
   std::vector<cudf::column_view> split_column = cudf::split(int16_col, splits);
@@ -54,23 +53,23 @@ TEST_F(ByteCastTest, int16ValuesWithNulls)
   auto odd_validity =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
 
-  fixed_width_column_wrapper<int16_t> const int16_col(
+  cudf::test::fixed_width_column_wrapper<int16_t> const int16_col(
     {short(0), short(100), short(-100), limits::min(), limits::max()}, {0, 1, 0, 1, 0});
   /* CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT compares underlying values even when specified as null,
    * resulting in erroneous test failures. The commented out data tests the case where underlying
    * values are different, but are both null. */
   // auto int16_data =
-  //   fixed_width_column_wrapper<uint8_t>{0xee, 0xff, 0x00, 0x64, 0xee, 0xff, 0x80, 0x00, 0xee,
-  //   0xff};
-  auto int16_data =
-    fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x64, 0xff, 0x9c, 0x80, 0x00, 0x7f, 0xff};
+  //   cudf::test::fixed_width_column_wrapper<uint8_t>{0xee, 0xff, 0x00, 0x64, 0xee, 0xff, 0x80,
+  //   0x00, 0xee, 0xff};
+  auto int16_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
+    0x00, 0x00, 0x00, 0x64, 0xff, 0x9c, 0x80, 0x00, 0x7f, 0xff};
 
   auto int16_expected = cudf::make_lists_column(
     5,
-    std::move(fixed_width_column_wrapper<cudf::size_type>{0, 2, 4, 6, 8, 10}.release()),
+    std::move(cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 2, 4, 6, 8, 10}.release()),
     std::move(int16_data.release()),
     3,
-    detail::make_null_mask(odd_validity, odd_validity + 5));
+    cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5));
 
   auto const output_int16 = cudf::byte_cast(int16_col, cudf::flip_endianness::YES);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int16->view(), int16_expected->view());
@@ -79,17 +78,19 @@ TEST_F(ByteCastTest, int16ValuesWithNulls)
 TEST_F(ByteCastTest, int32Values)
 {
   using limits = std::numeric_limits<int32_t>;
-  fixed_width_column_wrapper<int32_t> const int32_col({0, 100, -100, limits::min(), limits::max()});
-  lists_column_wrapper<uint8_t> const int32_expected_flipped({{0x00, 0x00, 0x00, 0x00},
-                                                              {0x00, 0x00, 0x00, 0x64},
-                                                              {0xff, 0xff, 0xff, 0x9c},
-                                                              {0x80, 0x00, 0x00, 0x00},
-                                                              {0x7f, 0xff, 0xff, 0xff}});
-  lists_column_wrapper<uint8_t> const int32_expected({{0x00, 0x00, 0x00, 0x00},
-                                                      {0x64, 0x00, 0x00, 0x00},
-                                                      {0x9c, 0xff, 0xff, 0xff},
-                                                      {0x00, 0x00, 0x00, 0x80},
-                                                      {0xff, 0xff, 0xff, 0x7f}});
+  cudf::test::fixed_width_column_wrapper<int32_t> const int32_col(
+    {0, 100, -100, limits::min(), limits::max()});
+  cudf::test::lists_column_wrapper<uint8_t> const int32_expected_flipped(
+    {{0x00, 0x00, 0x00, 0x00},
+     {0x00, 0x00, 0x00, 0x64},
+     {0xff, 0xff, 0xff, 0x9c},
+     {0x80, 0x00, 0x00, 0x00},
+     {0x7f, 0xff, 0xff, 0xff}});
+  cudf::test::lists_column_wrapper<uint8_t> const int32_expected({{0x00, 0x00, 0x00, 0x00},
+                                                                  {0x64, 0x00, 0x00, 0x00},
+                                                                  {0x9c, 0xff, 0xff, 0xff},
+                                                                  {0x00, 0x00, 0x00, 0x80},
+                                                                  {0xff, 0xff, 0xff, 0x7f}});
 
   auto const output_int32_flipped = cudf::byte_cast(int32_col, cudf::flip_endianness::YES);
   auto const output_int32         = cudf::byte_cast(int32_col, cudf::flip_endianness::NO);
@@ -103,23 +104,24 @@ TEST_F(ByteCastTest, int32ValuesWithNulls)
   auto even_validity =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i + 1) % 2; });
 
-  fixed_width_column_wrapper<int32_t> const int32_col({0, 100, -100, limits::min(), limits::max()},
-                                                      {1, 0, 1, 0, 1});
+  cudf::test::fixed_width_column_wrapper<int32_t> const int32_col(
+    {0, 100, -100, limits::min(), limits::max()}, {1, 0, 1, 0, 1});
   /* Data commented out below explained by comment in int16ValuesWithNulls test */
   // auto int32_data =
-  //   fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee, 0xff, 0xff,
-  //   0xff,
+  //   cudf::test::fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee,
+  //   0xff, 0xff, 0xff,
   //                                       0xff, 0x9c, 0xcc, 0xdd, 0xee, 0xff, 0x7f, 0xff, 0xff,
   //                                       0xff};
-  auto int32_data =
-    fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff,
-                                        0xff, 0x9c, 0x80, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff};
+  auto int32_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64, 0xff, 0xff,
+    0xff, 0x9c, 0x80, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff};
   auto int32_expected = cudf::make_lists_column(
     5,
-    std::move(fixed_width_column_wrapper<cudf::size_type>{0, 4, 8, 12, 16, 20}.release()),
+    std::move(
+      cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 4, 8, 12, 16, 20}.release()),
     std::move(int32_data.release()),
     2,
-    detail::make_null_mask(even_validity, even_validity + 5));
+    cudf::test::detail::make_null_mask(even_validity, even_validity + 5));
 
   auto const output_int32 = cudf::byte_cast(int32_col, cudf::flip_endianness::YES);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int32->view(), int32_expected->view());
@@ -128,19 +130,19 @@ TEST_F(ByteCastTest, int32ValuesWithNulls)
 TEST_F(ByteCastTest, int64ValuesWithSplit)
 {
   using limits = std::numeric_limits<int64_t>;
-  fixed_width_column_wrapper<int64_t> const int64_col(
+  cudf::test::fixed_width_column_wrapper<int64_t> const int64_col(
     {long(0), long(100), long(-100), limits::min(), limits::max()});
-  lists_column_wrapper<uint8_t> const int64_expected_flipped(
+  cudf::test::lists_column_wrapper<uint8_t> const int64_expected_flipped(
     {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x64},
      {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9c},
      {0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}});
-  lists_column_wrapper<uint8_t> const int64_expected_slice1(
+  cudf::test::lists_column_wrapper<uint8_t> const int64_expected_slice1(
     {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x9c, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}});
-  lists_column_wrapper<uint8_t> const int64_expected_slice2(
+  cudf::test::lists_column_wrapper<uint8_t> const int64_expected_slice2(
     {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80},
      {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f}});
 
@@ -161,23 +163,24 @@ TEST_F(ByteCastTest, int64ValuesWithNulls)
   auto odd_validity =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
 
-  fixed_width_column_wrapper<int64_t> const int64_col(
+  cudf::test::fixed_width_column_wrapper<int64_t> const int64_col(
     {long(0), long(100), long(-100), limits::min(), limits::max()}, {0, 1, 0, 1, 0});
   /* Data commented out below explained by comment in int16ValuesWithNulls test */
-  // auto int64_data = fixed_width_column_wrapper<uint8_t>{
+  // auto int64_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
   //   0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   //   0x00, 0x64, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x80, 0x00, 0x00, 0x00,
   //   0x00, 0x00, 0x00, 0x00, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
-  auto int64_data = fixed_width_column_wrapper<uint8_t>{
+  auto int64_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x64, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x9c, 0x80, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
   auto int64_expected = cudf::make_lists_column(
     5,
-    std::move(fixed_width_column_wrapper<cudf::size_type>{0, 8, 16, 24, 32, 40}.release()),
+    std::move(
+      cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 8, 16, 24, 32, 40}.release()),
     std::move(int64_data.release()),
     3,
-    detail::make_null_mask(odd_validity, odd_validity + 5));
+    cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5));
 
   auto const output_int64 = cudf::byte_cast(int64_col, cudf::flip_endianness::YES);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_int64->view(), int64_expected->view());
@@ -188,26 +191,26 @@ TEST_F(ByteCastTest, fp32ValuesWithSplit)
   using limits = std::numeric_limits<float>;
   float nan    = limits::quiet_NaN();
   float inf    = limits::infinity();
-  fixed_width_column_wrapper<float> const fp32_col(
+  cudf::test::fixed_width_column_wrapper<float> const fp32_col(
     {float(0.0), float(100.0), float(-100.0), limits::min(), limits::max(), nan, -nan, inf, -inf});
-  lists_column_wrapper<uint8_t> const fp32_expected({{0x00, 0x00, 0x00, 0x00},
-                                                     {0x00, 0x00, 0xc8, 0x42},
-                                                     {0x00, 0x00, 0xc8, 0xc2},
-                                                     {0x00, 0x00, 0x80, 0x00},
-                                                     {0xff, 0xff, 0x7f, 0x7f},
-                                                     {0x00, 0x00, 0xc0, 0x7f},
-                                                     {0x00, 0x00, 0xc0, 0xff},
-                                                     {0x00, 0x00, 0x80, 0x7f},
-                                                     {0x00, 0x00, 0x80, 0xff}});
-  lists_column_wrapper<uint8_t> const fp32_expected_slice1({{0x00, 0x00, 0x00, 0x00},
-                                                            {0x42, 0xc8, 0x00, 0x00},
-                                                            {0xc2, 0xc8, 0x00, 0x00},
-                                                            {0x00, 0x80, 0x00, 0x00},
-                                                            {0x7f, 0x7f, 0xff, 0xff}});
-  lists_column_wrapper<uint8_t> const fp32_expected_slice2({{0x7f, 0xc0, 0x00, 0x00},
-                                                            {0xff, 0xc0, 0x00, 0x00},
-                                                            {0x7f, 0x80, 0x00, 0x00},
-                                                            {0xff, 0x80, 0x00, 0x00}});
+  cudf::test::lists_column_wrapper<uint8_t> const fp32_expected({{0x00, 0x00, 0x00, 0x00},
+                                                                 {0x00, 0x00, 0xc8, 0x42},
+                                                                 {0x00, 0x00, 0xc8, 0xc2},
+                                                                 {0x00, 0x00, 0x80, 0x00},
+                                                                 {0xff, 0xff, 0x7f, 0x7f},
+                                                                 {0x00, 0x00, 0xc0, 0x7f},
+                                                                 {0x00, 0x00, 0xc0, 0xff},
+                                                                 {0x00, 0x00, 0x80, 0x7f},
+                                                                 {0x00, 0x00, 0x80, 0xff}});
+  cudf::test::lists_column_wrapper<uint8_t> const fp32_expected_slice1({{0x00, 0x00, 0x00, 0x00},
+                                                                        {0x42, 0xc8, 0x00, 0x00},
+                                                                        {0xc2, 0xc8, 0x00, 0x00},
+                                                                        {0x00, 0x80, 0x00, 0x00},
+                                                                        {0x7f, 0x7f, 0xff, 0xff}});
+  cudf::test::lists_column_wrapper<uint8_t> const fp32_expected_slice2({{0x7f, 0xc0, 0x00, 0x00},
+                                                                        {0xff, 0xc0, 0x00, 0x00},
+                                                                        {0x7f, 0x80, 0x00, 0x00},
+                                                                        {0xff, 0x80, 0x00, 0x00}});
 
   std::vector<cudf::size_type> splits({5});
   std::vector<cudf::column_view> split_column = cudf::split(fp32_col, splits);
@@ -226,23 +229,24 @@ TEST_F(ByteCastTest, fp32ValuesWithNulls)
   auto even_validity =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i + 1) % 2; });
 
-  fixed_width_column_wrapper<float> const fp32_col(
+  cudf::test::fixed_width_column_wrapper<float> const fp32_col(
     {float(0.0), float(100.0), float(-100.0), limits::min(), limits::max()}, {1, 0, 1, 0, 1});
   /* Data commented out below explained by comment in int16ValuesWithNulls test */
   // auto fp32_data =
-  //   fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee, 0xff, 0xc2,
-  //   0xc8,
+  //   cudf::test::fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x00, 0xcc, 0xdd, 0xee,
+  //   0xff, 0xc2, 0xc8,
   //                                       0x00, 0x00, 0xcc, 0xdd, 0xee, 0xff, 0x7f, 0x7f, 0xff,
   //                                       0xff};
-  auto fp32_data =
-    fixed_width_column_wrapper<uint8_t>{0x00, 0x00, 0x00, 0x00, 0x42, 0xc8, 0x00, 0x00, 0xc2, 0xc8,
-                                        0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x7f, 0x7f, 0xff, 0xff};
+  auto fp32_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
+    0x00, 0x00, 0x00, 0x00, 0x42, 0xc8, 0x00, 0x00, 0xc2, 0xc8,
+    0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x7f, 0x7f, 0xff, 0xff};
   auto fp32_expected = cudf::make_lists_column(
     5,
-    std::move(fixed_width_column_wrapper<cudf::size_type>{0, 4, 8, 12, 16, 20}.release()),
+    std::move(
+      cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 4, 8, 12, 16, 20}.release()),
     std::move(fp32_data.release()),
     2,
-    detail::make_null_mask(even_validity, even_validity + 5));
+    cudf::test::detail::make_null_mask(even_validity, even_validity + 5));
 
   auto const output_fp32 = cudf::byte_cast(fp32_col, cudf::flip_endianness::YES);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_fp32->view(), fp32_expected->view());
@@ -253,16 +257,16 @@ TEST_F(ByteCastTest, fp64ValuesWithSplit)
   using limits = std::numeric_limits<double>;
   double nan   = limits::quiet_NaN();
   double inf   = limits::infinity();
-  fixed_width_column_wrapper<double> const fp64_col({double(0.0),
-                                                     double(100.0),
-                                                     double(-100.0),
-                                                     limits::min(),
-                                                     limits::max(),
-                                                     nan,
-                                                     -nan,
-                                                     inf,
-                                                     -inf});
-  lists_column_wrapper<uint8_t> const fp64_flipped_expected(
+  cudf::test::fixed_width_column_wrapper<double> const fp64_col({double(0.0),
+                                                                 double(100.0),
+                                                                 double(-100.0),
+                                                                 limits::min(),
+                                                                 limits::max(),
+                                                                 nan,
+                                                                 -nan,
+                                                                 inf,
+                                                                 -inf});
+  cudf::test::lists_column_wrapper<uint8_t> const fp64_flipped_expected(
     {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x40, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0xc0, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
@@ -272,13 +276,13 @@ TEST_F(ByteCastTest, fp64ValuesWithSplit)
      {0xff, 0xf8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x7f, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0xff, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}});
-  lists_column_wrapper<uint8_t> const fp64_expected_slice1(
+  cudf::test::lists_column_wrapper<uint8_t> const fp64_expected_slice1(
     {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
      {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0x40},
      {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x59, 0xc0},
      {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00},
      {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xef, 0x7f}});
-  lists_column_wrapper<uint8_t> const fp64_expected_slice2(
+  cudf::test::lists_column_wrapper<uint8_t> const fp64_expected_slice2(
     {{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0x7f},
      {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf8, 0xff},
      {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf0, 0x7f},
@@ -301,23 +305,24 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls)
   auto odd_validity =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2; });
 
-  fixed_width_column_wrapper<double> const fp64_col(
+  cudf::test::fixed_width_column_wrapper<double> const fp64_col(
     {double(0.0), double(100.0), double(-100.0), limits::min(), limits::max()}, {0, 1, 0, 1, 0});
   /* Data commented out below explained by comment in int16ValuesWithNulls test */
-  // auto fp64_data = fixed_width_column_wrapper<uint8_t>{
+  // auto fp64_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
   //   0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x40, 0x59, 0x00, 0x00, 0x00, 0x00,
   //   0x00, 0x00, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff, 0x00, 0x10, 0x00, 0x00,
   //   0x00, 0x00, 0x00, 0x00, 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff};
-  auto fp64_data = fixed_width_column_wrapper<uint8_t>{
+  auto fp64_data = cudf::test::fixed_width_column_wrapper<uint8_t>{
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x59, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0xc0, 0x59, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00,
     0x00, 0x00, 0x00, 0x00, 0x7f, 0xef, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
   auto fp64_expected = cudf::make_lists_column(
     5,
-    std::move(fixed_width_column_wrapper<cudf::size_type>{0, 8, 16, 24, 32, 40}.release()),
+    std::move(
+      cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 8, 16, 24, 32, 40}.release()),
     std::move(fp64_data.release()),
     3,
-    detail::make_null_mask(odd_validity, odd_validity + 5));
+    cudf::test::detail::make_null_mask(odd_validity, odd_validity + 5));
 
   auto const output_fp64 = cudf::byte_cast(fp64_col, cudf::flip_endianness::YES);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output_fp64->view(), fp64_expected->view());
@@ -325,9 +330,9 @@ TEST_F(ByteCastTest, fp64ValuesWithNulls)
 
 TEST_F(ByteCastTest, StringValues)
 {
-  strings_column_wrapper const strings_col(
+  cudf::test::strings_column_wrapper const strings_col(
     {"", "The quick", " brown fox...", "!\"#$%&\'()*+,-./", "0123456789:;<=>?@", "[\\]^_`{|}~"});
-  lists_column_wrapper<uint8_t> const strings_expected(
+  cudf::test::lists_column_wrapper<uint8_t> const strings_expected(
     {{},
      {0x54, 0x68, 0x65, 0x20, 0x71, 0x75, 0x69, 0x63, 0x6b},
      {0x20, 0x62, 0x72, 0x6f, 0x77, 0x6e, 0x20, 0x66, 0x6f, 0x78, 0x2e, 0x2e, 0x2e},
diff --git a/cpp/tests/reshape/tile_tests.cpp b/cpp/tests/reshape/tile_tests.cpp
index e605fd7a84b..86dcc431633 100644
--- a/cpp/tests/reshape/tile_tests.cpp
+++ b/cpp/tests/reshape/tile_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -24,10 +24,8 @@
 #include <cudf/table/table.hpp>
 #include <cudf/utilities/error.hpp>
 
-using namespace cudf::test;
-
 template <typename T>
-struct TileTest : public BaseFixture {
+struct TileTest : public cudf::test::BaseFixture {
 };
 
 TYPED_TEST_SUITE(TileTest, cudf::test::AllTypes);
@@ -47,7 +45,7 @@ TYPED_TEST(TileTest, NoRows)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> in_a({});
+  cudf::test::fixed_width_column_wrapper<T> in_a({});
   cudf::table_view in(std::vector<cudf::column_view>{in_a});
 
   auto expected = in;
@@ -61,10 +59,10 @@ TYPED_TEST(TileTest, OneColumn)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1});
   cudf::table_view in(std::vector<cudf::column_view>{in_a});
 
-  fixed_width_column_wrapper<T, int32_t> expected_a({-1, 0, 1, -1, 0, 1});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> expected_a({-1, 0, 1, -1, 0, 1});
   cudf::table_view expected(std::vector<cudf::column_view>{expected_a});
 
   auto actual = cudf::tile(in, 2);
@@ -76,10 +74,11 @@ TYPED_TEST(TileTest, OneColumnNullable)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1}, {1, 0, 0});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1}, {1, 0, 0});
   cudf::table_view in(std::vector<cudf::column_view>{in_a});
 
-  fixed_width_column_wrapper<T, int32_t> expected_a({-1, 0, 1, -1, 0, 1}, {1, 0, 0, 1, 0, 0});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> expected_a({-1, 0, 1, -1, 0, 1},
+                                                                {1, 0, 0, 1, 0, 0});
   cudf::table_view expected(std::vector<cudf::column_view>{expected_a});
 
   auto actual = cudf::tile(in, 2);
@@ -91,7 +90,7 @@ TYPED_TEST(TileTest, OneColumnNegativeCount)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1}, {1, 0, 0});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1}, {1, 0, 0});
   cudf::table_view in(std::vector<cudf::column_view>{in_a});
 
   EXPECT_THROW(cudf::tile(in, -1), cudf::logic_error);
@@ -101,13 +100,13 @@ TYPED_TEST(TileTest, OneColumnZeroCount)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1}, {1, 0, 0});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> in_a({-1, 0, 1}, {1, 0, 0});
   cudf::table_view in(std::vector<cudf::column_view>{in_a});
 
   std::vector<T> vals{};
   std::vector<bool> mask{};
 
-  fixed_width_column_wrapper<T> expected_a(vals.begin(), vals.end(), mask.begin());
+  cudf::test::fixed_width_column_wrapper<T> expected_a(vals.begin(), vals.end(), mask.begin());
 
   cudf::table_view expected(std::vector<cudf::column_view>{expected_a});
 

From 416d4d5d194bf42ffe7ca0c0090860f3c91f90aa Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Thu, 20 Oct 2022 08:20:21 -0500
Subject: [PATCH 050/202] Enable backend dispatching for Dask-DataFrame
 creation (#11920)

This PR depends on https://github.com/dask/dask/pull/9475 (**Now Merged**)

After dask#9475, external libraries are now able to implement (and expose) their own `DataFrameBackendEntrypoint` definitions to specify custom creation functions for DataFrame collections. This PR introduces the `CudfBackendEntrypoint` class to create `dask_cudf.DataFrame` collections using the `dask.dataframe` API. By installing `dask_cudf` with this entrypoint definition in place, you get the following behavior in `dask.dataframe`:

```python
import dask.dataframe as dd
import dask

# Tell Dask that you want to create DataFrame collections
# with the "cudf" backend (for supported creation functions).
# This can also be used in a context, or set in a yaml file
dask.config.set({"dataframe.backend": "cudf"})

ddf = dd.from_dict({"a": range(10)}, npartitions=2)
type(ddf)  # dask_cudf.core.DataFrame
```

Note that the code snippet above does not require an explicit import of `cudf` or `dask_cudf`. The following creation functions will support backend dispatching after dask#9475:

- `from_dict`
- `read_paquet`
- `read_json`
- `read_orc`
- `read_csv`
- `read_hdf`

See also: https://github.com/dask/design-docs/pull/1

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/11920
---
 python/dask_cudf/dask_cudf/backends.py        | 110 ++++++++++++++++++
 python/dask_cudf/dask_cudf/core.py            |  23 ++--
 .../dask_cudf/dask_cudf/io/tests/test_csv.py  |  16 +++
 .../dask_cudf/dask_cudf/io/tests/test_json.py |  17 +++
 .../dask_cudf/dask_cudf/io/tests/test_orc.py  |  16 ++-
 .../dask_cudf/io/tests/test_parquet.py        |  14 +++
 python/dask_cudf/dask_cudf/tests/test_core.py |  17 +++
 python/dask_cudf/setup.cfg                    |   4 +
 8 files changed, 204 insertions(+), 13 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 2584ac47878..f02c75eb3e8 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+import warnings
 from collections.abc import Iterator
 
 import cupy as cp
@@ -8,6 +9,8 @@
 import pyarrow as pa
 from pandas.api.types import is_scalar
 
+import dask.dataframe as dd
+from dask import config
 from dask.dataframe.core import get_parallel_type, meta_nonempty
 from dask.dataframe.dispatch import (
     categorical_dtype_dispatch,
@@ -426,3 +429,110 @@ def sizeof_cudf_dataframe(df):
 @_dask_cudf_nvtx_annotate
 def sizeof_cudf_series_index(obj):
     return obj.memory_usage()
+
+
+def _default_backend(func, *args, **kwargs):
+    # Utility to call a dask.dataframe function with
+    # the default ("pandas") backend
+
+    # NOTE: Some `CudfBackendEntrypoint` methods need to
+    # invoke the "pandas"-version of the same method, but
+    # with custom kwargs (e.g. `engine`). In these cases,
+    # an explicit "pandas" config context is needed to
+    # avoid a recursive loop
+    with config.set({"dataframe.backend": "pandas"}):
+        return func(*args, **kwargs)
+
+
+try:
+
+    # Define "cudf" backend engine to be registered with Dask
+    from dask.dataframe.backends import DataFrameBackendEntrypoint
+
+    class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
+        """Backend-entrypoint class for Dask-DataFrame
+
+        This class is registered under the name "cudf" for the
+        ``dask.dataframe.backends`` entrypoint in ``setup.cfg``.
+        Dask-DataFrame will use the methods defined in this class
+        in place of ``dask.dataframe.<creation-method>`` when the
+        "dataframe.backend" configuration is set to "cudf":
+
+        Examples
+        --------
+        >>> import dask
+        >>> import dask.dataframe as dd
+        >>> with dask.config.set({"dataframe.backend": "cudf"}):
+        ...     ddf = dd.from_dict({"a": range(10)})
+        >>> type(ddf)
+        <class 'dask_cudf.core.DataFrame'>
+        """
+
+        @staticmethod
+        def from_dict(data, npartitions, orient="columns", **kwargs):
+            from dask_cudf import from_cudf
+
+            if orient != "columns":
+                raise ValueError(f"orient={orient} is not supported")
+            # TODO: Use cudf.from_dict
+            # (See: https://github.com/rapidsai/cudf/issues/11934)
+            return from_cudf(
+                cudf.DataFrame(data),
+                npartitions=npartitions,
+            )
+
+        @staticmethod
+        def read_parquet(*args, engine=None, **kwargs):
+            from dask_cudf.io.parquet import CudfEngine
+
+            return _default_backend(
+                dd.read_parquet,
+                *args,
+                engine=CudfEngine,
+                **kwargs,
+            )
+
+        @staticmethod
+        def read_json(*args, engine=None, **kwargs):
+            return _default_backend(
+                dd.read_json,
+                *args,
+                engine=cudf.read_json,
+                **kwargs,
+            )
+
+        @staticmethod
+        def read_orc(*args, **kwargs):
+            from dask_cudf.io import read_orc
+
+            return read_orc(*args, **kwargs)
+
+        @staticmethod
+        def read_csv(*args, **kwargs):
+            from dask_cudf.io import read_csv
+
+            chunksize = kwargs.pop("chunksize", None)
+            blocksize = kwargs.pop("blocksize", "default")
+            if chunksize is None and blocksize != "default":
+                chunksize = blocksize
+            return read_csv(
+                *args,
+                chunksize=chunksize,
+                **kwargs,
+            )
+
+        @staticmethod
+        def read_hdf(*args, **kwargs):
+            from dask_cudf import from_dask_dataframe
+
+            # HDF5 reader not yet implemented in cudf
+            warnings.warn(
+                "read_hdf is not yet implemented in cudf/dask_cudf. "
+                "Moving to cudf from pandas. Expect poor performance!"
+            )
+            return from_dask_dataframe(
+                _default_backend(dd.read_hdf, *args, **kwargs)
+            )
+
+except ImportError:
+    pass
diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index 0bf39df313a..76705e7cbf1 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -2,10 +2,10 @@
 
 import math
 import warnings
-from distutils.version import LooseVersion
 
 import numpy as np
 import pandas as pd
+from packaging.version import parse as parse_version
 from tlz import partition_all
 
 import dask
@@ -31,7 +31,11 @@
 from dask_cudf.accessors import ListMethods, StructMethods
 from dask_cudf.sorting import _get_shuffle_type
 
-DASK_VERSION = LooseVersion(dask.__version__)
+DASK_BACKEND_SUPPORT = parse_version(dask.__version__) >= parse_version(
+    "2022.10.0"
+)
+# TODO: Remove DASK_BACKEND_SUPPORT throughout codebase
+# when dask_cudf is pinned to dask>=2022.10.0
 
 
 class _Frame(dd.core._Frame, OperatorMethodMixin):
@@ -736,7 +740,7 @@ def from_dask_dataframe(df):
     return df.map_partitions(cudf.from_pandas)
 
 
-for name in [
+for name in (
     "add",
     "sub",
     "mul",
@@ -751,16 +755,13 @@ def from_dask_dataframe(df):
     "rfloordiv",
     "rmod",
     "rpow",
-]:
+):
     meth = getattr(cudf.DataFrame, name)
-    kwargs = {"original": cudf.DataFrame} if DASK_VERSION >= "2.11.1" else {}
-    DataFrame._bind_operator_method(name, meth, **kwargs)
+    DataFrame._bind_operator_method(name, meth, original=cudf.Series)
 
     meth = getattr(cudf.Series, name)
-    kwargs = {"original": cudf.Series} if DASK_VERSION >= "2.11.1" else {}
-    Series._bind_operator_method(name, meth, **kwargs)
+    Series._bind_operator_method(name, meth, original=cudf.Series)
 
-for name in ["lt", "gt", "le", "ge", "ne", "eq"]:
+for name in ("lt", "gt", "le", "ge", "ne", "eq"):
     meth = getattr(cudf.Series, name)
-    kwargs = {"original": cudf.Series} if DASK_VERSION >= "2.11.1" else {}
-    Series._bind_comparison_method(name, meth, **kwargs)
+    Series._bind_comparison_method(name, meth, original=cudf.Series)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_csv.py b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
index 564a719fb86..7f69e208b5a 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_csv.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_csv.py
@@ -16,6 +16,22 @@
 import dask_cudf
 
 
+@pytest.mark.skipif(
+    not dask_cudf.core.DASK_BACKEND_SUPPORT,
+    reason="No backend-dispatch support",
+)
+def test_csv_roundtrip_backend_dispatch(tmp_path):
+    # Test ddf.read_csv cudf-backend dispatch
+    df = cudf.DataFrame({"x": [1, 2, 3, 4], "id": ["a", "b", "c", "d"]})
+    ddf = dask_cudf.from_cudf(df, npartitions=2)
+    csv_path = str(tmp_path / "data-*.csv")
+    ddf.to_csv(csv_path, index=False)
+    with dask.config.set({"dataframe.backend": "cudf"}):
+        ddf2 = dd.read_csv(csv_path)
+    assert isinstance(ddf2, dask_cudf.DataFrame)
+    dd.assert_eq(ddf, ddf2, check_divisions=False, check_index=False)
+
+
 def test_csv_roundtrip(tmp_path):
     df = cudf.DataFrame({"x": [1, 2, 3, 4], "id": ["a", "b", "c", "d"]})
     ddf = dask_cudf.from_cudf(df, npartitions=2)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py
index 3f854bb343b..d19f7736e8e 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_json.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py
@@ -12,6 +12,23 @@
 import dask_cudf
 
 
+@pytest.mark.skipif(
+    not dask_cudf.core.DASK_BACKEND_SUPPORT,
+    reason="No backend-dispatch support",
+)
+def test_read_json_backend_dispatch(tmp_path):
+    # Test ddf.read_json cudf-backend dispatch
+    df1 = dask.datasets.timeseries(
+        dtypes={"x": int, "y": int}, freq="120s"
+    ).reset_index(drop=True)
+    json_path = str(tmp_path / "data-*.json")
+    df1.to_json(json_path)
+    with dask.config.set({"dataframe.backend": "cudf"}):
+        df2 = dd.read_json(json_path)
+    assert isinstance(df2, dask_cudf.DataFrame)
+    dd.assert_eq(df1, df2)
+
+
 def test_read_json(tmp_path):
     df1 = dask.datasets.timeseries(
         dtypes={"x": int, "y": int}, freq="120s"
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_orc.py b/python/dask_cudf/dask_cudf/io/tests/test_orc.py
index f19396a9b37..2291dfba536 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_orc.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_orc.py
@@ -6,18 +6,30 @@
 
 import pytest
 
+import dask
 from dask import dataframe as dd
 
 import cudf
 
 import dask_cudf
 
-# import pyarrow.orc as orc
-
 cur_dir = os.path.dirname(__file__)
 sample_orc = os.path.join(cur_dir, "data/orc/sample.orc")
 
 
+@pytest.mark.skipif(
+    not dask_cudf.core.DASK_BACKEND_SUPPORT,
+    reason="No backend-dispatch support",
+)
+def test_read_orc_backend_dispatch():
+    # Test ddf.read_orc cudf-backend dispatch
+    df1 = cudf.read_orc(sample_orc)
+    with dask.config.set({"dataframe.backend": "cudf"}):
+        df2 = dd.read_orc(sample_orc)
+    assert isinstance(df2, dask_cudf.DataFrame)
+    dd.assert_eq(df1, df2, check_index=False)
+
+
 def test_read_orc_defaults():
     df1 = cudf.read_orc(sample_orc)
     df2 = dask_cudf.read_orc(sample_orc)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
index ef5741b0539..7b9f926da3f 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_parquet.py
@@ -46,6 +46,20 @@ def _divisions(setting):
     return {"gather_statistics": setting}
 
 
+@pytest.mark.skipif(
+    not dask_cudf.core.DASK_BACKEND_SUPPORT,
+    reason="No backend-dispatch support",
+)
+def test_roundtrip_backend_dispatch(tmpdir):
+    # Test ddf.read_parquet cudf-backend dispatch
+    tmpdir = str(tmpdir)
+    ddf.to_parquet(tmpdir, engine="pyarrow")
+    with dask.config.set({"dataframe.backend": "cudf"}):
+        ddf2 = dd.read_parquet(tmpdir, index=False)
+    assert isinstance(ddf2, dask_cudf.DataFrame)
+    dd.assert_eq(ddf.reset_index(drop=False), ddf2)
+
+
 @pytest.mark.parametrize("write_metadata_file", [True, False])
 @pytest.mark.parametrize("divisions", [True, False])
 def test_roundtrip_from_dask(tmpdir, divisions, write_metadata_file):
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index 40041fd5c0e..f7c46466705 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -17,6 +17,23 @@
 import dask_cudf as dgd
 
 
+@pytest.mark.skipif(
+    not dgd.core.DASK_BACKEND_SUPPORT, reason="No backend-dispatch support"
+)
+def test_from_dict_backend_dispatch():
+    # Test ddf.from_dict cudf-backend dispatch
+    np.random.seed(0)
+    data = {
+        "x": np.random.randint(0, 5, size=10000),
+        "y": np.random.normal(size=10000),
+    }
+    expect = cudf.DataFrame(data)
+    with dask.config.set({"dataframe.backend": "cudf"}):
+        ddf = dd.from_dict(data, npartitions=2)
+    assert isinstance(ddf, dgd.DataFrame)
+    dd.assert_eq(expect, ddf)
+
+
 def test_from_cudf():
     np.random.seed(0)
 
diff --git a/python/dask_cudf/setup.cfg b/python/dask_cudf/setup.cfg
index 8f4c2029a87..f45bdf00430 100644
--- a/python/dask_cudf/setup.cfg
+++ b/python/dask_cudf/setup.cfg
@@ -38,3 +38,7 @@ skip=
     buck-out
     build
     dist
+
+[options.entry_points]
+dask.dataframe.backends =
+    cudf = dask_cudf.backends:CudfBackendEntrypoint

From ff41841cee58a2e945d39dfb1d11d823393814ed Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 20 Oct 2022 09:04:36 -0700
Subject: [PATCH 051/202] Remove validation that requires introspection
 (#11938)

This PR removes optional validation for some APIs. Performing these validations requires data introspection, which we do not want. This PR resolves #5505.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Roeschke (https://github.com/mroeschke)
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/11938
---
 cpp/benchmarks/lists/copying/scatter_lists.cu |  2 +-
 cpp/include/cudf/copying.hpp                  | 18 ++--
 cpp/include/cudf/detail/scatter.cuh           | 11 ---
 cpp/include/cudf/detail/scatter.hpp           | 18 +---
 cpp/include/cudf/filling.hpp                  |  8 +-
 cpp/src/copying/copy.cu                       |  9 +-
 cpp/src/copying/scatter.cu                    | 32 ++-----
 cpp/src/filling/repeat.cu                     | 11 +--
 cpp/src/groupby/sort/scan.cpp                 |  6 +-
 cpp/src/groupby/sort/sort_helper.cu           |  1 -
 cpp/src/partitioning/partitioning.cu          |  4 +-
 cpp/src/rolling/detail/lead_lag_nested.cuh    |  1 -
 .../copying/scatter_list_scalar_tests.cpp     |  4 +-
 .../copying/scatter_struct_scalar_tests.cpp   |  4 +-
 cpp/tests/copying/scatter_tests.cpp           | 87 ++++++-------------
 cpp/tests/filling/repeat_tests.cpp            | 23 +----
 java/src/main/java/ai/rapids/cudf/Table.java  | 38 ++------
 java/src/main/native/src/TableJni.cpp         | 15 ++--
 .../test/java/ai/rapids/cudf/TableTest.java   | 15 +---
 python/cudf/cudf/_lib/copying.pyx             | 22 +++--
 python/cudf/cudf/_lib/cpp/copying.pxd         |  2 -
 python/cudf/cudf/_lib/cpp/filling.pxd         |  3 +-
 python/cudf/cudf/_lib/filling.pyx             |  9 +-
 python/cudf/cudf/core/column/column.py        | 23 ++---
 24 files changed, 99 insertions(+), 267 deletions(-)

diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu
index 823693721a0..d86fb0578e5 100644
--- a/cpp/benchmarks/lists/copying/scatter_lists.cu
+++ b/cpp/benchmarks/lists/copying/scatter_lists.cu
@@ -108,7 +108,7 @@ void BM_lists_scatter(::benchmark::State& state)
 
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, false, mr);
+    scatter(table_view{{*source}}, *scatter_map, table_view{{*target}}, mr);
   }
 
   state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) * 2 *
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 1c3ca179d17..79dcaaaf00b 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -140,13 +140,12 @@ std::unique_ptr<column> reverse(
  * If the same index appears more than once in the scatter map, the result is
  * undefined.
  *
+ * If any values in `scatter_map` are outside of the interval [-n, n) where `n`
+ * is the number of rows in the `target` table, behavior is undefined.
+ *
  * A negative value `i` in the `scatter_map` is interpreted as `i+n`, where `n`
  * is the number of rows in the `target` table.
  *
- * @throws cudf::logic_error if `check_bounds == true` and an index exists in
- * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in
- * the target table. If `check_bounds == false`, the behavior is undefined.
- *
  * @param source The input columns containing values to be scattered into the
  * target columns
  * @param scatter_map A non-nullable column of integral indices that maps the
@@ -154,8 +153,6 @@ std::unique_ptr<column> reverse(
  * to or less than the number of elements in the source columns.
  * @param target The set of columns into which values from the source_table
  * are to be scattered
- * @param check_bounds Optionally perform bounds checking on the values of
- * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return Result of scattering values from source to target
  */
@@ -163,7 +160,6 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   column_view const& scatter_map,
   table_view const& target,
-  bool check_bounds                   = false,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -184,9 +180,8 @@ std::unique_ptr<table> scatter(
  * If the same index appears more than once in the scatter map, the result is
  * undefined.
  *
- * @throws cudf::logic_error if `check_bounds == true` and an index exists in
- * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in
- * the target table. If `check_bounds == false`, the behavior is undefined.
+ * If any values in `scatter_map` are outside of the interval [-n, n) where `n`
+ * is the number of rows in the `target` table, behavior is undefined.
  *
  * @param source The input scalars containing values to be scattered into the
  * target columns
@@ -194,8 +189,6 @@ std::unique_ptr<table> scatter(
  * the rows in the target table to be replaced by source.
  * @param target The set of columns into which values from the source_table
  * are to be scattered
- * @param check_bounds Optionally perform bounds checking on the values of
- * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return Result of scattering values from source to target
  */
@@ -203,7 +196,6 @@ std::unique_ptr<table> scatter(
   std::vector<std::reference_wrapper<const scalar>> const& source,
   column_view const& indices,
   table_view const& target,
-  bool check_bounds                   = false,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 09b16b11a73..413f4c4dae4 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -390,7 +390,6 @@ std::unique_ptr<table> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   table_view const& target,
-  bool check_bounds                   = false,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
@@ -398,16 +397,6 @@ std::unique_ptr<table> scatter(
 
   using MapType = typename thrust::iterator_traits<MapIterator>::value_type;
 
-  if (check_bounds) {
-    auto const begin = -target.num_rows();
-    auto const end   = target.num_rows();
-    auto bounds      = bounds_checker<MapType>{begin, end};
-    CUDF_EXPECTS(
-      std::distance(scatter_map_begin, scatter_map_end) ==
-        thrust::count_if(rmm::exec_policy(stream), scatter_map_begin, scatter_map_end, bounds),
-      "Scatter map index out of bounds");
-  }
-
   CUDF_EXPECTS(std::distance(scatter_map_begin, scatter_map_end) <= source.num_rows(),
                "scatter map size should be <= to number of rows in source");
 
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 8c993368ff2..801088b803c 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -45,10 +45,8 @@ namespace detail {
  *
  * If the same index appears more than once in the scatter map, the result is
  * undefined.
- *
- * @throws cudf::logic_error if `check_bounds == true` and an index exists in
- * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in
- * the target table. If `check_bounds == false`, the behavior is undefined.
+ * If any values in `scatter_map` are outside of the interval [-n, n) where `n`
+ * is the number of rows in the `target` table, behavior is undefined.
  *
  * @param source The input columns containing values to be scattered into the
  * target columns
@@ -57,8 +55,6 @@ namespace detail {
  * to or less than the number of elements in the source columns.
  * @param target The set of columns into which values from the source_table
  * are to be scattered
- * @param check_bounds Optionally perform bounds checking on the values of
- * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return Result of scattering values from source to target
@@ -67,7 +63,6 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   column_view const& scatter_map,
   table_view const& target,
-  bool check_bounds                   = false,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -81,7 +76,6 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   device_span<size_type const> const scatter_map,
   table_view const& target,
-  bool check_bounds                   = false,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -101,9 +95,8 @@ std::unique_ptr<table> scatter(
  * If the same index appears more than once in the scatter map, the result is
  * undefined.
  *
- * @throws cudf::logic_error if `check_bounds == true` and an index exists in
- * `scatter_map` outside the range `[-n, n)`, where `n` is the number of rows in
- * the target table. If `check_bounds == false`, the behavior is undefined.
+ * If any values in `indices` are outside of the interval [-n, n) where `n`
+ * is the number of rows in the `target` table, behavior is undefined.
  *
  * @param source The input scalars containing values to be scattered into the
  * target columns
@@ -111,8 +104,6 @@ std::unique_ptr<table> scatter(
  * the rows in the target table to be replaced by source.
  * @param target The set of columns into which values from the source_table
  * are to be scattered
- * @param check_bounds Optionally perform bounds checking on the values of
- * `scatter_map` and throw an error if any of its values are out of bounds.
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return Result of scattering values from source to target
@@ -121,7 +112,6 @@ std::unique_ptr<table> scatter(
   std::vector<std::reference_wrapper<const scalar>> const& source,
   column_view const& indices,
   table_view const& target,
-  bool check_bounds                   = false,
   rmm::cuda_stream_view stream        = cudf::default_stream_value,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/filling.hpp b/cpp/include/cudf/filling.hpp
index 5f9d13f9a2c..8688e97ab7e 100644
--- a/cpp/include/cudf/filling.hpp
+++ b/cpp/include/cudf/filling.hpp
@@ -103,26 +103,22 @@ std::unique_ptr<column> fill(
  * ```
  * @p count should not have null values; should not contain negative values;
  * and the sum of count elements should not overflow the size_type's limit.
- * It is undefined behavior if @p count has negative values or the sum overflows
- * and @p check_count is set to false.
+ * The behavior of this function is undefined if @p count has negative values
+ * or the sum overflows.
  *
  * @throws cudf::logic_error if the data type of @p count is not size_type.
  * @throws cudf::logic_error if @p input_table and @p count have different
  * number of rows.
  * @throws cudf::logic_error if @p count has null values.
- * @throws cudf::logic_error if @p check_count is set to true and @p count
- * has negative values or the sum of @p count elements overflows.
  *
  * @param input_table Input table
  * @param count Non-nullable column of an integral type
- * @param check_count Whether to check count (negative values and overflow)
  * @param mr Device memory resource used to allocate the returned table's device memory
  * @return The result table containing the repetitions
  */
 std::unique_ptr<table> repeat(
   table_view const& input_table,
   column_view const& count,
-  bool check_count                    = false,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index 5585eac923c..7e5b9288628 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -180,7 +180,6 @@ std::unique_ptr<column> scatter_gather_based_if_else(cudf::column_view const& lh
     table_view{std::vector<column_view>{scatter_src_lhs->get_column(0).view()}},
     gather_map,
     table_view{std::vector<column_view>{rhs}},
-    false,
     stream,
     mr);
 
@@ -208,12 +207,8 @@ std::unique_ptr<column> scatter_gather_based_if_else(cudf::scalar const& lhs,
                                                    static_cast<cudf::size_type>(scatter_map_size),
                                                    scatter_map.begin()};
 
-  auto result = cudf::detail::scatter(scatter_source,
-                                      scatter_map_column_view,
-                                      table_view{std::vector<column_view>{rhs}},
-                                      false,
-                                      stream,
-                                      mr);
+  auto result = cudf::detail::scatter(
+    scatter_source, scatter_map_column_view, table_view{std::vector<column_view>{rhs}}, stream, mr);
 
   return std::move(result->release()[0]);
 }
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 79c27816009..63711a43c3b 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -285,7 +285,6 @@ struct column_scalar_scatterer_impl<struct_view, MapIterator> {
 std::unique_ptr<table> scatter(table_view const& source,
                                column_view const& scatter_map,
                                table_view const& target,
-                               bool check_bounds,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
@@ -307,13 +306,12 @@ std::unique_ptr<table> scatter(table_view const& source,
   // create index type normalizing iterator for the scatter_map
   auto map_begin = indexalator_factory::make_input_iterator(scatter_map);
   auto map_end   = map_begin + scatter_map.size();
-  return detail::scatter(source, map_begin, map_end, target, check_bounds, stream, mr);
+  return detail::scatter(source, map_begin, map_end, target, stream, mr);
 }
 
 std::unique_ptr<table> scatter(table_view const& source,
                                device_span<size_type const> const scatter_map,
                                table_view const& target,
-                               bool check_bounds,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
@@ -322,13 +320,12 @@ std::unique_ptr<table> scatter(table_view const& source,
   auto map_col = column_view(data_type{type_to_id<size_type>()},
                              static_cast<size_type>(scatter_map.size()),
                              scatter_map.data());
-  return scatter(source, map_col, target, check_bounds, stream, mr);
+  return scatter(source, map_col, target, stream, mr);
 }
 
 std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>> const& source,
                                column_view const& indices,
                                table_view const& target,
-                               bool check_bounds,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
@@ -340,20 +337,9 @@ std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>>
 
   // Create normalizing iterator for indices column
   auto map_begin = indexalator_factory::make_input_iterator(indices);
-  auto map_end   = map_begin + indices.size();
 
   // Optionally check map index values are within the number of target rows.
   auto const n_rows = target.num_rows();
-  if (check_bounds) {
-    CUDF_EXPECTS(
-      indices.size() == thrust::count_if(rmm::exec_policy(stream),
-                                         map_begin,
-                                         map_end,
-                                         [n_rows] __device__(size_type index) {
-                                           return ((index >= -n_rows) && (index < n_rows));
-                                         }),
-      "Scatter map index out of bounds");
-  }
 
   // Transform negative indices to index + target size
   auto scatter_rows = indices.size();
@@ -404,12 +390,8 @@ std::unique_ptr<column> boolean_mask_scatter(column_view const& input,
   // The scatter map is actually a table with only one column, which is scatter map.
   auto scatter_map =
     detail::apply_boolean_mask(table_view{{indices->view()}}, boolean_mask, stream);
-  auto output_table = detail::scatter(table_view{{input}},
-                                      scatter_map->get_column(0).view(),
-                                      table_view{{target}},
-                                      false,
-                                      stream,
-                                      mr);
+  auto output_table = detail::scatter(
+    table_view{{input}}, scatter_map->get_column(0).view(), table_view{{target}}, stream, mr);
 
   // There is only one column in output_table
   return std::make_unique<column>(std::move(output_table->get_column(0)));
@@ -505,21 +487,19 @@ std::unique_ptr<table> boolean_mask_scatter(
 std::unique_ptr<table> scatter(table_view const& source,
                                column_view const& scatter_map,
                                table_view const& target,
-                               bool check_bounds,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::scatter(source, scatter_map, target, check_bounds, cudf::default_stream_value, mr);
+  return detail::scatter(source, scatter_map, target, cudf::default_stream_value, mr);
 }
 
 std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>> const& source,
                                column_view const& indices,
                                table_view const& target,
-                               bool check_bounds,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::scatter(source, indices, target, check_bounds, cudf::default_stream_value, mr);
+  return detail::scatter(source, indices, target, cudf::default_stream_value, mr);
 }
 
 std::unique_ptr<table> boolean_mask_scatter(table_view const& input,
diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu
index 90c644933ec..b2587e67350 100644
--- a/cpp/src/filling/repeat.cu
+++ b/cpp/src/filling/repeat.cu
@@ -103,7 +103,6 @@ namespace cudf {
 namespace detail {
 std::unique_ptr<table> repeat(table_view const& input_table,
                               column_view const& count,
-                              bool check_count,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
@@ -112,19 +111,12 @@ std::unique_ptr<table> repeat(table_view const& input_table,
 
   if (input_table.num_rows() == 0) { return cudf::empty_like(input_table); }
 
-  if (check_count) { cudf::type_dispatcher(count.type(), count_checker{count}, stream); }
-
   auto count_iter = cudf::detail::indexalator_factory::make_input_iterator(count);
 
   rmm::device_uvector<cudf::size_type> offsets(count.size(), stream);
   thrust::inclusive_scan(
     rmm::exec_policy(stream), count_iter, count_iter + count.size(), offsets.begin());
 
-  if (check_count) {
-    CUDF_EXPECTS(thrust::is_sorted(rmm::exec_policy(stream), offsets.begin(), offsets.end()),
-                 "count has negative values or the resulting table has too many rows.");
-  }
-
   size_type output_size{offsets.back_element(stream)};
   rmm::device_uvector<size_type> indices(output_size, stream);
   thrust::upper_bound(rmm::exec_policy(stream),
@@ -162,11 +154,10 @@ std::unique_ptr<table> repeat(table_view const& input_table,
 
 std::unique_ptr<table> repeat(table_view const& input_table,
                               column_view const& count,
-                              bool check_count,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat(input_table, count, check_count, cudf::default_stream_value, mr);
+  return detail::repeat(input_table, count, cudf::default_stream_value, mr);
 }
 
 std::unique_ptr<table> repeat(table_view const& input_table,
diff --git a/cpp/src/groupby/sort/scan.cpp b/cpp/src/groupby/sort/scan.cpp
index 5d345273782..743ca5e8065 100644
--- a/cpp/src/groupby/sort/scan.cpp
+++ b/cpp/src/groupby/sort/scan.cpp
@@ -178,9 +178,9 @@ void scan_result_functor::operator()<aggregation::RANK>(aggregation const& agg)
                                               stream,
                                               mr);
   }
-  result = std::move(cudf::detail::scatter(
-                       table_view{{*result}}, *gather_map, table_view{{*result}}, false, stream, mr)
-                       ->release()[0]);
+  result = std::move(
+    cudf::detail::scatter(table_view{{*result}}, *gather_map, table_view{{*result}}, stream, mr)
+      ->release()[0]);
   if (rank_agg._null_handling == null_policy::EXCLUDE) {
     result->set_null_mask(cudf::detail::copy_bitmask(get_grouped_values(), stream, mr));
   }
diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu
index a0abaf71160..53ab65e9be7 100644
--- a/cpp/src/groupby/sort/sort_helper.cu
+++ b/cpp/src/groupby/sort/sort_helper.cu
@@ -244,7 +244,6 @@ column_view sort_groupby_helper::unsorted_keys_labels(rmm::cuda_stream_view stre
     cudf::detail::scatter(table_view({group_labels_view}),
                           scatter_map,
                           table_view({temp_labels->view()}),
-                          false,
                           stream,
                           rmm::mr::get_current_device_resource());
 
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 3e0cc26dcdd..296a9f40fbb 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -610,7 +610,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition_table(
 
     // Use the resulting scatter map to materialize the output
     auto output = detail::scatter(
-      input, row_partition_numbers.begin(), row_partition_numbers.end(), input, false, stream, mr);
+      input, row_partition_numbers.begin(), row_partition_numbers.end(), input, stream, mr);
 
     stream.synchronize();  // Async D2H copy must finish before returning host vec
     return std::pair(std::move(output), std::move(partition_offsets));
@@ -698,7 +698,7 @@ struct dispatch_map_type {
 
     // Scatter the rows into their partitions
     auto scattered =
-      cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, false, stream, mr);
+      cudf::detail::scatter(t, scatter_map.begin(), scatter_map.end(), t, stream, mr);
 
     return std::pair(std::move(scattered), std::move(partition_offsets));
   }
diff --git a/cpp/src/rolling/detail/lead_lag_nested.cuh b/cpp/src/rolling/detail/lead_lag_nested.cuh
index a23786ec7f3..859ed7e5d53 100644
--- a/cpp/src/rolling/detail/lead_lag_nested.cuh
+++ b/cpp/src/rolling/detail/lead_lag_nested.cuh
@@ -198,7 +198,6 @@ std::unique_ptr<column> compute_lead_lag_for_nested(aggregation::Kind op,
     table_view{std::vector<column_view>{gathered_defaults->release()[0]->view()}},
     scatter_map,
     table_view{std::vector<column_view>{output_with_nulls->release()[0]->view()}},
-    false,
     stream,
     mr);
   return std::move(scattered_results->release()[0]);
diff --git a/cpp/tests/copying/scatter_list_scalar_tests.cpp b/cpp/tests/copying/scatter_list_scalar_tests.cpp
index 7d3de9b6c15..40b5dcff7b6 100644
--- a/cpp/tests/copying/scatter_list_scalar_tests.cpp
+++ b/cpp/tests/copying/scatter_list_scalar_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -38,7 +38,7 @@ std::unique_ptr<column> single_scalar_scatter(column_view const& target,
 {
   std::vector<std::reference_wrapper<const scalar>> slrs{slr};
   table_view targets{{target}};
-  auto result = scatter(slrs, scatter_map, targets, true);
+  auto result = scatter(slrs, scatter_map, targets);
   return std::move(result->release()[0]);
 }
 
diff --git a/cpp/tests/copying/scatter_struct_scalar_tests.cpp b/cpp/tests/copying/scatter_struct_scalar_tests.cpp
index 44e65110f33..62201224893 100644
--- a/cpp/tests/copying/scatter_struct_scalar_tests.cpp
+++ b/cpp/tests/copying/scatter_struct_scalar_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,7 +41,7 @@ TYPED_TEST_SUITE(TypedStructScalarScatterTest, FixedWidthTypes);
 
 column scatter_single_scalar(scalar const& slr, column_view scatter_map, column_view target)
 {
-  auto result = scatter({slr}, scatter_map, table_view{{target}}, false);
+  auto result = scatter({slr}, scatter_map, table_view{{target}});
   return result->get_column(0);
 }
 
diff --git a/cpp/tests/copying/scatter_tests.cpp b/cpp/tests/copying/scatter_tests.cpp
index 306ab8a3d5c..f853920e24c 100644
--- a/cpp/tests/copying/scatter_tests.cpp
+++ b/cpp/tests/copying/scatter_tests.cpp
@@ -39,7 +39,7 @@ TEST_F(ScatterUntypedTests, ScatterMapTooLong)
   auto const source_table = cudf::table_view({source, source});
   auto const target_table = cudf::table_view({target, target});
 
-  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if scatter map has nulls
@@ -54,7 +54,7 @@ TEST_F(ScatterUntypedTests, ScatterMapNulls)
   auto const source_table = cudf::table_view({source, source});
   auto const target_table = cudf::table_view({target, target});
 
-  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if scatter map has nulls
@@ -72,7 +72,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarMapNulls)
 
   auto const target_table = cudf::table_view({target});
 
-  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if source and target have different number of columns
@@ -87,7 +87,7 @@ TEST_F(ScatterUntypedTests, ScatterColumnNumberMismatch)
   auto const source_table = cudf::table_view({source});
   auto const target_table = cudf::table_view({target, target});
 
-  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if number of scalars doesn't match number of columns
@@ -105,7 +105,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarColumnNumberMismatch)
 
   auto const target_table = cudf::table_view({target, target});
 
-  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if source and target have different data types
@@ -120,7 +120,7 @@ TEST_F(ScatterUntypedTests, ScatterDataTypeMismatch)
   auto const source_table = cudf::table_view({source});
   auto const target_table = cudf::table_view({target});
 
-  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if source and target have different data types
@@ -138,7 +138,7 @@ TEST_F(ScatterUntypedTests, ScatterScalarDataTypeMismatch)
 
   auto const target_table = cudf::table_view({target});
 
-  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error);
 }
 
 template <typename T>
@@ -148,43 +148,6 @@ class ScatterIndexTypeTests : public cudf::test::BaseFixture {
 using IndexTypes = cudf::test::Types<int8_t, int16_t, int32_t, int64_t>;
 TYPED_TEST_SUITE(ScatterIndexTypeTests, IndexTypes);
 
-// Throw logic error if check_bounds is set and index is out of bounds
-TYPED_TEST(ScatterIndexTypeTests, ScatterOutOfBounds)
-{
-  using cudf::test::fixed_width_column_wrapper;
-
-  fixed_width_column_wrapper<TypeParam> source({1, 2, 3, 4, 5, 6});
-  fixed_width_column_wrapper<TypeParam> target({10, 20, 30, 40, 50, 60, 70, 80});
-  fixed_width_column_wrapper<TypeParam> upper_bound({-3, 3, 1, 8});
-  fixed_width_column_wrapper<TypeParam> lower_bound({-3, 3, 1, -9});
-
-  auto const source_table = cudf::table_view({source, source});
-  auto const target_table = cudf::table_view({target, target});
-
-  EXPECT_THROW(cudf::scatter(source_table, upper_bound, target_table, true), cudf::logic_error);
-  EXPECT_THROW(cudf::scatter(source_table, lower_bound, target_table, true), cudf::logic_error);
-}
-
-// Throw logic error if check_bounds is set and index is out of bounds
-TYPED_TEST(ScatterIndexTypeTests, ScatterScalarOutOfBounds)
-{
-  using cudf::scalar_type_t;
-  using cudf::test::fixed_width_column_wrapper;
-
-  auto const source = scalar_type_t<TypeParam>(100, true);
-  std::reference_wrapper<const cudf::scalar> slr_ref{source};
-  std::vector<std::reference_wrapper<const cudf::scalar>> source_vector{slr_ref};
-
-  fixed_width_column_wrapper<TypeParam> target({10, 20, 30, 40, 50, 60, 70, 80});
-  fixed_width_column_wrapper<TypeParam> upper_bound({-3, 3, 1, 8});
-  fixed_width_column_wrapper<TypeParam> lower_bound({-3, 3, 1, -9});
-
-  auto const target_table = cudf::table_view({target});
-
-  EXPECT_THROW(cudf::scatter(source_vector, upper_bound, target_table, true), cudf::logic_error);
-  EXPECT_THROW(cudf::scatter(source_vector, lower_bound, target_table, true), cudf::logic_error);
-}
-
 // Validate that each of the index types work
 TYPED_TEST(ScatterIndexTypeTests, ScatterIndexType)
 {
@@ -199,7 +162,7 @@ TYPED_TEST(ScatterIndexTypeTests, ScatterIndexType)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -221,7 +184,7 @@ TYPED_TEST(ScatterIndexTypeTests, ScatterScalarIndexType)
   auto const target_table   = cudf::table_view({target});
   auto const expected_table = cudf::table_view({expected});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -248,7 +211,7 @@ TYPED_TEST(ScatterInvalidIndexTypeTests, ScatterInvalidIndexType)
   auto const source_table = cudf::table_view({source, source});
   auto const target_table = cudf::table_view({target, target});
 
-  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_table, scatter_map, target_table), cudf::logic_error);
 }
 
 // Throw logic error if scatter map column has invalid data type
@@ -266,7 +229,7 @@ TYPED_TEST(ScatterInvalidIndexTypeTests, ScatterScalarInvalidIndexType)
 
   auto const target_table = cudf::table_view({target});
 
-  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table, true), cudf::logic_error);
+  EXPECT_THROW(cudf::scatter(source_vector, scatter_map, target_table), cudf::logic_error);
 }
 
 template <typename T>
@@ -287,7 +250,7 @@ TYPED_TEST(ScatterDataTypeTests, EmptyScatterMap)
   auto const source_table = cudf::table_view({source, source});
   auto const target_table = cudf::table_view({target, target});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   // Expect a copy of the input table
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), target_table);
@@ -309,7 +272,7 @@ TYPED_TEST(ScatterDataTypeTests, EmptyScalarScatterMap)
 
   auto const target_table = cudf::table_view({target});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   // Expect a copy of the input table
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), target_table);
@@ -328,7 +291,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterNoNulls)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -348,7 +311,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterBothNulls)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -367,7 +330,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterSourceNulls)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -387,7 +350,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterTargetNulls)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -409,7 +372,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarNoNulls)
   auto const target_table   = cudf::table_view({target});
   auto const expected_table = cudf::table_view({expected});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -433,7 +396,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarTargetNulls)
   auto const target_table   = cudf::table_view({target});
   auto const expected_table = cudf::table_view({expected});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -457,7 +420,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarSourceNulls)
   auto const target_table   = cudf::table_view({target});
   auto const expected_table = cudf::table_view({expected});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -482,7 +445,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterScalarBothNulls)
   auto const target_table   = cudf::table_view({target});
   auto const expected_table = cudf::table_view({expected});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -510,7 +473,7 @@ TYPED_TEST(ScatterDataTypeTests, ScatterSourceNullsLarge)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -540,7 +503,7 @@ TEST_F(ScatterStringsTests, ScatterNoNulls)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -568,7 +531,7 @@ TEST_F(ScatterStringsTests, ScatterScalarNoNulls)
   auto const target_table   = cudf::table_view({target});
   auto const expected_table = cudf::table_view({expected});
 
-  auto const result = cudf::scatter(source_vector, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_vector, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(result->view(), expected_table);
 }
@@ -937,7 +900,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointScatter)
   auto const target_table   = cudf::table_view({target, target});
   auto const expected_table = cudf::table_view({expected, expected});
 
-  auto const result = cudf::scatter(source_table, scatter_map, target_table, true);
+  auto const result = cudf::scatter(source_table, scatter_map, target_table);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected_table, result->view());
 }
diff --git a/cpp/tests/filling/repeat_tests.cpp b/cpp/tests/filling/repeat_tests.cpp
index 7d30298b1bd..df8dceb0f8d 100644
--- a/cpp/tests/filling/repeat_tests.cpp
+++ b/cpp/tests/filling/repeat_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -272,24 +272,3 @@ TEST_F(RepeatErrorTestFixture, CountHasNulls)
   // input_table.has_nulls() == true
   EXPECT_THROW(auto ret = cudf::repeat(input_table, count), cudf::logic_error);
 }
-
-TEST_F(RepeatErrorTestFixture, NegativeCountOrOverflow)
-{
-  auto input = cudf::test::fixed_width_column_wrapper<int32_t>(
-    thrust::make_counting_iterator(0), thrust::make_counting_iterator(0) + 100);
-
-  auto count_neg = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
-    thrust::make_constant_iterator(-1, 0), thrust::make_constant_iterator(-1, 100));
-
-  auto value          = std::numeric_limits<cudf::size_type>::max() / 10;
-  auto count_overflow = cudf::test::fixed_width_column_wrapper<cudf::size_type>(
-    thrust::make_constant_iterator(value, 0), thrust::make_constant_iterator(value, 100));
-
-  cudf::table_view input_table{{input}};
-
-  // negative
-  EXPECT_THROW(auto p_ret = cudf::repeat(input_table, count_neg, true), cudf::logic_error);
-
-  // overflow
-  EXPECT_THROW(auto p_ret = cudf::repeat(input_table, count_overflow, true), cudf::logic_error);
-}
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index dbc2a28c38c..c6f606e971d 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -706,10 +706,10 @@ private static native long[] dropDuplicates(long nativeHandle, int[] keyColumns,
   private static native long[] gather(long tableHandle, long gatherView, boolean checkBounds);
 
   private static native long[] scatterTable(long srcTableHandle, long scatterView,
-                                            long targetTableHandle, boolean checkBounds)
+                                            long targetTableHandle)
                                             throws CudfException;
   private static native long[] scatterScalars(long[] srcScalarHandles, long scatterView,
-                                             long targetTableHandle, boolean checkBounds)
+                                             long targetTableHandle)
                                              throws CudfException;
 
   private static native long[] convertToRows(long nativeHandle);
@@ -723,8 +723,7 @@ private static native long[] scatterScalars(long[] srcScalarHandles, long scatte
   private static native long[] repeatStaticCount(long tableHandle, int count);
 
   private static native long[] repeatColumnCount(long tableHandle,
-                                                 long columnHandle,
-                                                 boolean checkCount);
+                                                 long columnHandle);
 
   private static native long rowBitCount(long tableHandle) throws CudfException;
 
@@ -1686,22 +1685,7 @@ public Table repeat(int count) {
    * @throws CudfException on any error.
    */
   public Table repeat(ColumnView counts) {
-    return repeat(counts, true);
-  }
-
-  /**
-   * Create a new table by repeating each row of this table. The number of
-   * repetitions of each row is defined by the corresponding value in counts.
-   * @param counts the number of times to repeat each row. Cannot have nulls, must be an
-   *               Integer type, and must have one entry for each row in the table.
-   * @param checkCount should counts be checked for errors before processing. Be careful if you
-   *                   disable this because if you pass in bad data you might just get back an
-   *                   empty table or bad data.
-   * @return the new Table.
-   * @throws CudfException on any error.
-   */
-  public Table repeat(ColumnView counts, boolean checkCount) {
-    return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView(), checkCount));
+    return new Table(repeatColumnCount(this.nativeHandle, counts.getNativeView()));
   }
 
   /**
@@ -2349,14 +2333,11 @@ public Table gather(ColumnView gatherMap, OutOfBoundsPolicy outOfBoundsPolicy) {
    *
    * @param scatterMap The map of indexes. Must be non-nullable and integral type.
    * @param target The table into which rows from the current table are to be scattered out-of-place.
-   * @param checkBounds Optionally perform bounds checking on the values of`scatterMap` and throw
-   *                    an exception if any of its values are out of bounds.
    * @return A new table which is the result of out-of-place scattering the source table into the
    *         target table.
    */
-  public Table scatter(ColumnView scatterMap, Table target, boolean checkBounds) {
-    return new Table(scatterTable(nativeHandle, scatterMap.getNativeView(), target.getNativeView(),
-        checkBounds));
+  public Table scatter(ColumnView scatterMap, Table target) {
+    return new Table(scatterTable(nativeHandle, scatterMap.getNativeView(), target.getNativeView()));
   }
 
   /**
@@ -2376,20 +2357,17 @@ public Table scatter(ColumnView scatterMap, Table target, boolean checkBounds) {
    * @param source The input scalars containing values to be scattered into the target table.
    * @param scatterMap The map of indexes. Must be non-nullable and integral type.
    * @param target The table into which the values from source are to be scattered out-of-place.
-   * @param checkBounds Optionally perform bounds checking on the values of`scatterMap` and throw
-   *                    an exception if any of its values are out of bounds.
    * @return A new table which is the result of out-of-place scattering the source values into the
    *         target table.
    */
-  public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target,
-                              boolean checkBounds) {
+  public static Table scatter(Scalar[] source, ColumnView scatterMap, Table target) {
     long[] srcScalarHandles = new long[source.length];
     for(int i = 0; i < source.length; ++i) {
       assert source[i] != null : "Scalar vectors passed in should not contain null";
       srcScalarHandles[i] = source[i].getScalarHandle();
     }
     return new Table(scatterScalars(srcScalarHandles, scatterMap.getNativeView(),
-        target.getNativeView(), checkBounds));
+        target.getNativeView()));
   }
 
   private static GatherMap[] buildJoinGatherMaps(long[] gatherMapData) {
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index c23c5a3ccb2..cbd0aee335e 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -2979,8 +2979,7 @@ Java_ai_rapids_cudf_Table_convertToRowsFixedWidthOptimized(JNIEnv *env, jclass,
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterTable(JNIEnv *env, jclass,
                                                                     jlong j_input, jlong j_map,
-                                                                    jlong j_target,
-                                                                    jboolean check_bounds) {
+                                                                    jlong j_target) {
   JNI_NULL_CHECK(env, j_input, "input table is null", 0);
   JNI_NULL_CHECK(env, j_map, "map column is null", 0);
   JNI_NULL_CHECK(env, j_target, "target table is null", 0);
@@ -2989,15 +2988,14 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterTable(JNIEnv *env,
     auto const input = reinterpret_cast<cudf::table_view const *>(j_input);
     auto const map = reinterpret_cast<cudf::column_view const *>(j_map);
     auto const target = reinterpret_cast<cudf::table_view const *>(j_target);
-    return convert_table_for_return(env, cudf::scatter(*input, *map, *target, check_bounds));
+    return convert_table_for_return(env, cudf::scatter(*input, *map, *target));
   }
   CATCH_STD(env, 0);
 }
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterScalars(JNIEnv *env, jclass,
                                                                       jlongArray j_input,
-                                                                      jlong j_map, jlong j_target,
-                                                                      jboolean check_bounds) {
+                                                                      jlong j_map, jlong j_target) {
   JNI_NULL_CHECK(env, j_input, "input scalars array is null", 0);
   JNI_NULL_CHECK(env, j_map, "map column is null", 0);
   JNI_NULL_CHECK(env, j_target, "target table is null", 0);
@@ -3009,7 +3007,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_scatterScalars(JNIEnv *en
                    [](auto &scalar) { return std::ref(*scalar); });
     auto const map = reinterpret_cast<cudf::column_view const *>(j_map);
     auto const target = reinterpret_cast<cudf::table_view const *>(j_target);
-    return convert_table_for_return(env, cudf::scatter(input, *map, *target, check_bounds));
+    return convert_table_for_return(env, cudf::scatter(input, *map, *target));
   }
   CATCH_STD(env, 0);
 }
@@ -3094,15 +3092,14 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_repeatStaticCount(JNIEnv
 
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_repeatColumnCount(JNIEnv *env, jclass,
                                                                          jlong input_jtable,
-                                                                         jlong count_jcol,
-                                                                         jboolean check_count) {
+                                                                         jlong count_jcol) {
   JNI_NULL_CHECK(env, input_jtable, "input table is null", 0);
   JNI_NULL_CHECK(env, count_jcol, "count column is null", 0);
   try {
     cudf::jni::auto_set_device(env);
     auto const input = reinterpret_cast<cudf::table_view const *>(input_jtable);
     auto const count = reinterpret_cast<cudf::column_view const *>(count_jcol);
-    return convert_table_for_return(env, cudf::repeat(*input, *count, check_count));
+    return convert_table_for_return(env, cudf::repeat(*input, *count));
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index f31da054091..f564a55463b 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -2689,17 +2689,6 @@ void testRepeatColumn() {
     }
   }
 
-  @Test
-  void testRepeatColumnBad() {
-    try (Table t = new Table.TestBuilder()
-            .column(1, 2)
-            .column("a", "b")
-            .build();
-         ColumnVector repeats = ColumnVector.fromBytes((byte)2, (byte)-1)) {
-      assertThrows(CudfException.class, () -> t.repeat(repeats));
-    }
-  }
-
   @Test
   void testInterleaveIntColumns() {
     try (Table t = new Table.TestBuilder()
@@ -6963,7 +6952,7 @@ void testScatterTable() {
             .decimal32Column(-3, 1, -2, 2, 4, 3)
             .decimal64Column(-8, 100001L, -200002L, 200002L, 400004L, 300003L)
             .build();
-         Table result = srcTable.scatter(scatterMap, targetTable, false)) {
+         Table result = srcTable.scatter(scatterMap, targetTable)) {
       assertTablesAreEqual(expected, result);
     }
   }
@@ -6981,7 +6970,7 @@ void testScatterScalars() {
             .column(0, -2, 0, -4, 0)
             .column("A", "BB", "A", "BBBB", "A")
             .build();
-         Table result = Table.scatter(new Scalar[] { s1, s2 }, scatterMap, targetTable, false)) {
+         Table result = Table.scatter(new Scalar[] { s1, s2 }, scatterMap, targetTable)) {
        assertTablesAreEqual(expected, result);
      }
   }
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index f1183e008f8..a9cfbbbe223 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -194,8 +194,7 @@ def gather(
 
 cdef scatter_scalar(list source_device_slrs,
                     column_view scatter_map,
-                    table_view target_table,
-                    bool bounds_check):
+                    table_view target_table):
     cdef vector[reference_wrapper[constscalar]] c_source
     cdef DeviceScalar d_slr
     cdef unique_ptr[table] c_result
@@ -212,7 +211,6 @@ cdef scatter_scalar(list source_device_slrs,
                 c_source,
                 scatter_map,
                 target_table,
-                bounds_check
             )
         )
 
@@ -221,8 +219,7 @@ cdef scatter_scalar(list source_device_slrs,
 
 cdef scatter_column(list source_columns,
                     column_view scatter_map,
-                    table_view target_table,
-                    bool bounds_check):
+                    table_view target_table):
     cdef table_view c_source = table_view_from_columns(source_columns)
     cdef unique_ptr[table] c_result
 
@@ -232,7 +229,6 @@ cdef scatter_column(list source_columns,
                 c_source,
                 scatter_map,
                 target_table,
-                bounds_check
             )
         )
     return columns_from_unique_ptr(move(c_result))
@@ -257,14 +253,24 @@ def scatter(list sources, Column scatter_map, list target_columns,
     cdef column_view scatter_map_view = scatter_map.view()
     cdef table_view target_table_view = table_view_from_columns(target_columns)
 
+    if bounds_check:
+        n_rows = len(target_columns[0])
+        if not (
+            (scatter_map >= -n_rows).all()
+            and (scatter_map < n_rows).all()
+        ):
+            raise IndexError(
+                f"index out of bounds for column of size {n_rows}"
+            )
+
     if isinstance(sources[0], Column):
         return scatter_column(
-            sources, scatter_map_view, target_table_view, bounds_check
+            sources, scatter_map_view, target_table_view
         )
     else:
         source_scalars = [as_device_scalar(slr) for slr in sources]
         return scatter_scalar(
-            source_scalars, scatter_map_view, target_table_view, bounds_check
+            source_scalars, scatter_map_view, target_table_view
         )
 
 
diff --git a/python/cudf/cudf/_lib/cpp/copying.pxd b/python/cudf/cudf/_lib/cpp/copying.pxd
index a1c433774b5..bc89d364004 100644
--- a/python/cudf/cudf/_lib/cpp/copying.pxd
+++ b/python/cudf/cudf/_lib/cpp/copying.pxd
@@ -44,14 +44,12 @@ cdef extern from "cudf/copying.hpp" namespace "cudf" nogil:
         table_view source_table,
         column_view scatter_map,
         table_view target_table,
-        bool bounds_check
     ) except +
 
     cdef unique_ptr[table] scatter (
         vector[reference_wrapper[constscalar]] source_scalars,
         column_view indices,
         table_view target,
-        bool bounds_check
     ) except +
 
     ctypedef enum mask_allocation_policy:
diff --git a/python/cudf/cudf/_lib/cpp/filling.pxd b/python/cudf/cudf/_lib/cpp/filling.pxd
index 4233ab60ff2..e412f294537 100644
--- a/python/cudf/cudf/_lib/cpp/filling.pxd
+++ b/python/cudf/cudf/_lib/cpp/filling.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -29,7 +29,6 @@ cdef extern from "cudf/filling.hpp" namespace "cudf" nogil:
     cdef unique_ptr[table] repeat(
         const table_view & input,
         const column_view & count,
-        bool check_count
     ) except +
 
     cdef unique_ptr[table] repeat(
diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx
index 7de63def6a6..592d56158a1 100644
--- a/python/cudf/cudf/_lib/filling.pyx
+++ b/python/cudf/cudf/_lib/filling.pyx
@@ -54,24 +54,23 @@ def fill(Column destination, int begin, int end, DeviceScalar value):
     return Column.from_unique_ptr(move(c_result))
 
 
-def repeat(list inp, object count, bool check_count=False):
+def repeat(list inp, object count):
     if isinstance(count, Column):
-        return _repeat_via_column(inp, count, check_count)
+        return _repeat_via_column(inp, count)
     else:
         return _repeat_via_size_type(inp, count)
 
 
-def _repeat_via_column(list inp, Column count, bool check_count):
+def _repeat_via_column(list inp, Column count):
     cdef table_view c_inp = table_view_from_columns(inp)
     cdef column_view c_count = count.view()
-    cdef bool c_check_count = check_count
+    cdef bool c_check_count = False
     cdef unique_ptr[table] c_result
 
     with nogil:
         c_result = move(cpp_filling.repeat(
             c_inp,
             c_count,
-            c_check_count
         ))
 
     return columns_from_unique_ptr(move(c_result))
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 66ae984ee81..7291b695312 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -571,21 +571,14 @@ def _scatter_by_column(
 
         self._check_scatter_key_length(num_keys, value)
 
-        try:
-            if is_bool_dtype(key.dtype):
-                return libcudf.copying.boolean_mask_scatter(
-                    [value], [self], key
-                )[0]._with_type_metadata(self.dtype)
-            else:
-                return libcudf.copying.scatter([value], key, [self])[
-                    0
-                ]._with_type_metadata(self.dtype)
-        except RuntimeError as e:
-            if "out of bounds" in str(e):
-                raise IndexError(
-                    f"index out of bounds for column of size {len(self)}"
-                ) from e
-            raise
+        if is_bool_dtype(key.dtype):
+            return libcudf.copying.boolean_mask_scatter([value], [self], key)[
+                0
+            ]._with_type_metadata(self.dtype)
+        else:
+            return libcudf.copying.scatter([value], key, [self])[
+                0
+            ]._with_type_metadata(self.dtype)
 
     def _check_scatter_key_length(
         self, num_keys: int, value: Union[cudf.core.scalar.Scalar, ColumnBase]

From 536ddd08fd2e86940c55dab178f4a1a1e9361539 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 20 Oct 2022 12:47:32 -0400
Subject: [PATCH 052/202] Tell jitify_preprocess where to search for libnvrtc
 (#11787)

On machines with multiple CUDA Toolkits installed it is possible to have a mismatch between the version of `nvcc` used to compile code and the version of `libnvrtc` used for the JIT code. This generally occurs when `LD_LIBRARY_PATH` points to a different version of the CUDA Toolkit.

We now explicitly specify what toolkit library directory to search when JIT code during libcudf compilation.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11787
---
 cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index 8ce98c6d582..0d52a064761 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -39,7 +39,8 @@ function(jit_preprocess_files)
       VERBATIM
       COMMAND ${CMAKE_COMMAND} -E make_directory "${jit_output_directory}"
       COMMAND
-        jitify_preprocess ${ARG_FILE} -o
+        "${CMAKE_COMMAND}" -E env LD_LIBRARY_PATH="${CUDAToolkit_LIBRARY_DIR}"
+        $<TARGET_FILE:jitify_preprocess> ${ARG_FILE} -o
         ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -m -std=c++17
         -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -I${CUDF_SOURCE_DIR}/include
         -I${CUDF_SOURCE_DIR}/src ${libcudacxx_includes} -I${CUDAToolkit_INCLUDE_DIRS}

From 98185fef48070ed916a52b86bd330cfcb83d827b Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Thu, 20 Oct 2022 12:18:56 -0700
Subject: [PATCH 053/202] Fix writing of Parquet files with many fragments
 (#11869)

This PR fixes an error that can occur when very small page sizes are used when writing Parquet files. #11551 changed from fixed 5000 row page fragments to a scaled value based on the requested max page size. For small page sizes, the number of fragments to process can exceed 64k. The number of fragments is used as the `y` dimension when calling `gpuInitPageFragments`, and when it exceeds 64k the kernel fails to launch, ultimately leading to an invalid memory access.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11869
---
 cpp/src/io/parquet/page_enc.cu | 184 ++++++++++++++++++---------------
 cpp/tests/io/parquet_test.cpp  |  18 ++++
 2 files changed, 117 insertions(+), 85 deletions(-)

diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index cdee066a06a..7c5651b1ef8 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -61,6 +61,12 @@ constexpr int32_t NO_TRUNC_STATS = 0;
 // minimum scratch space required for encoding statistics
 constexpr size_t MIN_STATS_SCRATCH_SIZE = sizeof(__int128_t);
 
+// mask to determine lane id
+constexpr uint32_t WARP_MASK = cudf::detail::warp_size - 1;
+
+// currently 64k - 1
+constexpr uint32_t MAX_GRID_Y_SIZE = (1 << 16) - 1;
+
 struct frag_init_state_s {
   parquet_column_device_view col;
   PageFragment frag;
@@ -116,82 +122,87 @@ __global__ void __launch_bounds__(block_size)
   using block_reduce = cub::BlockReduce<uint32_t, block_size>;
   __shared__ typename block_reduce::TempStorage reduce_storage;
 
-  frag_init_state_s* const s = &state_g;
-  uint32_t t                 = threadIdx.x;
-  int frag_y                 = blockIdx.y;
-  auto const physical_type   = col_desc[blockIdx.x].physical_type;
+  frag_init_state_s* const s              = &state_g;
+  uint32_t const t                        = threadIdx.x;
+  auto const physical_type                = col_desc[blockIdx.x].physical_type;
+  uint32_t const num_fragments_per_column = frag.size().second;
 
-  if (t == 0) s->col = col_desc[blockIdx.x];
+  if (t == 0) { s->col = col_desc[blockIdx.x]; }
   __syncthreads();
-  if (!t) {
-    // Find which partition this fragment came from
-    auto it =
-      thrust::upper_bound(thrust::seq, part_frag_offset.begin(), part_frag_offset.end(), frag_y);
-    int p             = it - part_frag_offset.begin() - 1;
-    int part_end_row  = partitions[p].start_row + partitions[p].num_rows;
-    s->frag.start_row = (frag_y - part_frag_offset[p]) * fragment_size + partitions[p].start_row;
-
-    // frag.num_rows = fragment_size except for the last fragment in partition which can be smaller.
-    // num_rows is fixed but fragment size could be larger if the data is strings or nested.
-    s->frag.num_rows           = min(fragment_size, part_end_row - s->frag.start_row);
-    s->frag.num_dict_vals      = 0;
-    s->frag.fragment_data_size = 0;
-    s->frag.dict_data_size     = 0;
-
-    s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col);
-    size_type end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col);
-    s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx;
-
-    if (s->col.level_offsets != nullptr) {
-      // For nested schemas, the number of values in a fragment is not directly related to the
-      // number of encoded data elements or the number of rows.  It is simply the number of
-      // repetition/definition values which together encode validity and nesting information.
-      size_type first_level_val_idx = s->col.level_offsets[s->frag.start_row];
-      size_type last_level_val_idx  = s->col.level_offsets[s->frag.start_row + s->frag.num_rows];
-      s->frag.num_values            = last_level_val_idx - first_level_val_idx;
-    } else {
-      s->frag.num_values = s->frag.num_rows;
-    }
-  }
+
   auto const leaf_type = s->col.leaf_column->type().id();
   auto const dtype_len = physical_type_len(physical_type, leaf_type);
-  __syncthreads();
 
-  size_type nvals           = s->frag.num_leaf_values;
-  size_type start_value_idx = s->frag.start_value_idx;
-
-  for (uint32_t i = 0; i < nvals; i += block_size) {
-    uint32_t val_idx  = start_value_idx + i + t;
-    uint32_t is_valid = (i + t < nvals && val_idx < s->col.leaf_column->size())
-                          ? s->col.leaf_column->is_valid(val_idx)
-                          : 0;
-    uint32_t len;
-    if (is_valid) {
-      len = dtype_len;
-      if (physical_type == BYTE_ARRAY) {
-        switch (leaf_type) {
-          case type_id::STRING: {
-            auto str = s->col.leaf_column->element<string_view>(val_idx);
-            len += str.size_bytes();
-          } break;
-          case type_id::LIST: {
-            auto list_element =
-              get_element<statistics::byte_array_view>(*s->col.leaf_column, val_idx);
-            len += list_element.size_bytes();
-          } break;
-          default: CUDF_UNREACHABLE("Unsupported data type for leaf column");
-        }
+  for (uint32_t frag_y = blockIdx.y; frag_y < num_fragments_per_column; frag_y += gridDim.y) {
+    if (t == 0) {
+      // Find which partition this fragment came from
+      auto it =
+        thrust::upper_bound(thrust::seq, part_frag_offset.begin(), part_frag_offset.end(), frag_y);
+      int p             = it - part_frag_offset.begin() - 1;
+      int part_end_row  = partitions[p].start_row + partitions[p].num_rows;
+      s->frag.start_row = (frag_y - part_frag_offset[p]) * fragment_size + partitions[p].start_row;
+
+      // frag.num_rows = fragment_size except for the last fragment in partition which can be
+      // smaller. num_rows is fixed but fragment size could be larger if the data is strings or
+      // nested.
+      s->frag.num_rows           = min(fragment_size, part_end_row - s->frag.start_row);
+      s->frag.num_dict_vals      = 0;
+      s->frag.fragment_data_size = 0;
+      s->frag.dict_data_size     = 0;
+
+      s->frag.start_value_idx = row_to_value_idx(s->frag.start_row, s->col);
+      size_type end_value_idx = row_to_value_idx(s->frag.start_row + s->frag.num_rows, s->col);
+      s->frag.num_leaf_values = end_value_idx - s->frag.start_value_idx;
+
+      if (s->col.level_offsets != nullptr) {
+        // For nested schemas, the number of values in a fragment is not directly related to the
+        // number of encoded data elements or the number of rows.  It is simply the number of
+        // repetition/definition values which together encode validity and nesting information.
+        size_type first_level_val_idx = s->col.level_offsets[s->frag.start_row];
+        size_type last_level_val_idx  = s->col.level_offsets[s->frag.start_row + s->frag.num_rows];
+        s->frag.num_values            = last_level_val_idx - first_level_val_idx;
+      } else {
+        s->frag.num_values = s->frag.num_rows;
       }
-    } else {
-      len = 0;
     }
+    __syncthreads();
 
-    len = block_reduce(reduce_storage).Sum(len);
-    if (!t) { s->frag.fragment_data_size += len; }
+    size_type nvals           = s->frag.num_leaf_values;
+    size_type start_value_idx = s->frag.start_value_idx;
+
+    for (uint32_t i = 0; i < nvals; i += block_size) {
+      uint32_t val_idx  = start_value_idx + i + t;
+      uint32_t is_valid = (i + t < nvals && val_idx < s->col.leaf_column->size())
+                            ? s->col.leaf_column->is_valid(val_idx)
+                            : 0;
+      uint32_t len;
+      if (is_valid) {
+        len = dtype_len;
+        if (physical_type == BYTE_ARRAY) {
+          switch (leaf_type) {
+            case type_id::STRING: {
+              auto str = s->col.leaf_column->element<string_view>(val_idx);
+              len += str.size_bytes();
+            } break;
+            case type_id::LIST: {
+              auto list_element =
+                get_element<statistics::byte_array_view>(*s->col.leaf_column, val_idx);
+              len += list_element.size_bytes();
+            } break;
+            default: CUDF_UNREACHABLE("Unsupported data type for leaf column");
+          }
+        }
+      } else {
+        len = 0;
+      }
+
+      len = block_reduce(reduce_storage).Sum(len);
+      if (t == 0) { s->frag.fragment_data_size += len; }
+      __syncthreads();
+    }
     __syncthreads();
+    if (t == 0) { frag[blockIdx.x][frag_y] = s->frag; }
   }
-  __syncthreads();
-  if (t == 0) frag[blockIdx.x][blockIdx.y] = s->frag;
 }
 
 // blockDim {128,1,1}
@@ -200,21 +211,21 @@ __global__ void __launch_bounds__(128)
                        device_2dspan<PageFragment const> fragments,
                        device_span<parquet_column_device_view const> col_desc)
 {
-  // TODO: why not 1 block per warp?
-  __shared__ __align__(8) statistics_group group_g[4];
-
-  uint32_t lane_id              = threadIdx.x & 0x1f;
-  uint32_t frag_id              = blockIdx.y * 4 + (threadIdx.x >> 5);
-  uint32_t column_id            = blockIdx.x;
-  auto num_fragments_per_column = fragments.size().second;
-  statistics_group* const g     = &group_g[threadIdx.x >> 5];
-  if (!lane_id && frag_id < num_fragments_per_column) {
-    g->col       = &col_desc[column_id];
-    g->start_row = fragments[column_id][frag_id].start_value_idx;
-    g->num_rows  = fragments[column_id][frag_id].num_leaf_values;
+  uint32_t const lane_id                  = threadIdx.x & WARP_MASK;
+  uint32_t const column_id                = blockIdx.x;
+  uint32_t const num_fragments_per_column = fragments.size().second;
+
+  uint32_t frag_id = blockIdx.y * 4 + (threadIdx.x / cudf::detail::warp_size);
+  while (frag_id < num_fragments_per_column) {
+    if (lane_id == 0) {
+      statistics_group g;
+      g.col                      = &col_desc[column_id];
+      g.start_row                = fragments[column_id][frag_id].start_value_idx;
+      g.num_rows                 = fragments[column_id][frag_id].num_leaf_values;
+      groups[column_id][frag_id] = g;
+    }
+    frag_id += gridDim.y * 4;
   }
-  __syncthreads();
-  if (frag_id < num_fragments_per_column and lane_id == 0) groups[column_id][frag_id] = *g;
 }
 
 // blockDim {128,1,1}
@@ -2017,9 +2028,10 @@ void InitPageFragments(device_2dspan<PageFragment> frag,
                        uint32_t fragment_size,
                        rmm::cuda_stream_view stream)
 {
-  auto num_columns              = frag.size().first;
-  auto num_fragments_per_column = frag.size().second;
-  dim3 dim_grid(num_columns, num_fragments_per_column);  // 1 threadblock per fragment
+  auto const num_columns              = frag.size().first;
+  auto const num_fragments_per_column = frag.size().second;
+  auto const grid_y = std::min(static_cast<uint32_t>(num_fragments_per_column), MAX_GRID_Y_SIZE);
+  dim3 const dim_grid(num_columns, grid_y);  // 1 threadblock per fragment
   gpuInitPageFragments<512><<<dim_grid, 512, 0, stream.value()>>>(
     frag, col_desc, partitions, part_frag_offset, fragment_size);
 }
@@ -2031,8 +2043,10 @@ void InitFragmentStatistics(device_2dspan<statistics_group> groups,
 {
   int const num_columns              = col_desc.size();
   int const num_fragments_per_column = fragments.size().second;
-  auto grid_y = util::div_rounding_up_safe(num_fragments_per_column, 128 / cudf::detail::warp_size);
-  dim3 dim_grid(num_columns, grid_y);  // 1 warp per fragment
+  auto const y_dim =
+    util::div_rounding_up_safe(num_fragments_per_column, 128 / cudf::detail::warp_size);
+  auto const grid_y = std::min(static_cast<uint32_t>(y_dim), MAX_GRID_Y_SIZE);
+  dim3 const dim_grid(num_columns, grid_y);  // 1 warp per fragment
   gpuInitFragmentStats<<<dim_grid, 128, 0, stream.value()>>>(groups, fragments, col_desc);
 }
 
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index b13e875eabd..9bb2aa207e4 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -1048,6 +1048,24 @@ TEST_F(ParquetWriterTest, HostBuffer)
   cudf::test::expect_metadata_equal(expected_metadata, result.metadata);
 }
 
+TEST_F(ParquetWriterTest, ManyFragments)
+{
+  srand(31337);
+  auto const expected = create_random_fixed_table<int>(10, 6'000'000, false);
+
+  auto const filepath = temp_env->get_temp_filepath("ManyFragments.parquet");
+  cudf::io::parquet_writer_options const args =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *expected)
+      .max_page_size_bytes(8 * 1024);
+  cudf::io::write_parquet(args);
+
+  cudf::io::parquet_reader_options const read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto const result = cudf::io::read_parquet(read_opts);
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *expected);
+}
+
 TEST_F(ParquetWriterTest, NonNullable)
 {
   srand(31337);

From ee9ffd04acf9ab05af2a510dda50d73574c63569 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 20 Oct 2022 17:21:20 -0500
Subject: [PATCH 054/202] Default to equal NaNs in
 make_collect_set_aggregation. (#11621)

Partially resolves #11329. This helps to align our default behaviors for null and NaN equality across APIs, specifically for `make_collect_set_aggregation` in this PR. All functions should default to treating null values as equal to one another and NaN values as equal to one another.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11621
---
 cpp/include/cudf/aggregation.hpp           |  7 ++---
 cpp/tests/groupby/collect_set_tests.cpp    | 15 ++++++++---
 cpp/tests/reductions/collect_ops_tests.cpp | 12 ++++++---
 cpp/tests/rolling/collect_ops_test.cpp     | 30 ++++++++++++++--------
 4 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index a26a0c7947b..a92da0b0347 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -515,9 +515,10 @@ std::unique_ptr<Base> make_collect_list_aggregation(
  * @return A COLLECT_SET aggregation object
  */
 template <typename Base = aggregation>
-std::unique_ptr<Base> make_collect_set_aggregation(null_policy null_handling = null_policy::INCLUDE,
-                                                   null_equality nulls_equal = null_equality::EQUAL,
-                                                   nan_equality nans_equal = nan_equality::UNEQUAL);
+std::unique_ptr<Base> make_collect_set_aggregation(
+  null_policy null_handling = null_policy::INCLUDE,
+  null_equality nulls_equal = null_equality::EQUAL,
+  nan_equality nans_equal   = nan_equality::ALL_EQUAL);
 
 /**
  * @brief Factory to create a LAG aggregation
diff --git a/cpp/tests/groupby/collect_set_tests.cpp b/cpp/tests/groupby/collect_set_tests.cpp
index cf324cf3a8e..818a4c63a1f 100644
--- a/cpp/tests/groupby/collect_set_tests.cpp
+++ b/cpp/tests/groupby/collect_set_tests.cpp
@@ -248,7 +248,10 @@ TEST_F(CollectSetTest, FloatsWithNaN)
     vals_expected = {{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f},
                       validity_col{true, true, true, true, true, true, true, false}}};
     auto const [out_keys, out_lists] =
-      groupby_collect_set(keys, vals, CollectSetTest::collect_set());
+      groupby_collect_set(keys,
+                          vals,
+                          cudf::make_collect_set_aggregation<cudf::groupby_aggregation>(
+                            null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity);
   }
@@ -258,7 +261,10 @@ TEST_F(CollectSetTest, FloatsWithNaN)
     vals_expected = {{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f},
                       validity_col{true, true, true, true, true, true, true, false, false}}};
     auto const [out_keys, out_lists] =
-      groupby_collect_set(keys, vals, CollectSetTest::collect_set_null_unequal());
+      groupby_collect_set(keys,
+                          vals,
+                          cudf::make_collect_set_aggregation<cudf::groupby_aggregation>(
+                            null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::UNEQUAL));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity);
   }
@@ -267,7 +273,10 @@ TEST_F(CollectSetTest, FloatsWithNaN)
   {
     vals_expected = {{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN}};
     auto const [out_keys, out_lists] =
-      groupby_collect_set(keys, vals, CollectSetTest::collect_set_null_exclude());
+      groupby_collect_set(keys,
+                          vals,
+                          cudf::make_collect_set_aggregation<cudf::groupby_aggregation>(
+                            null_policy::EXCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(keys_expected, *out_keys, verbosity);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(vals_expected, *out_lists, verbosity);
   }
diff --git a/cpp/tests/reductions/collect_ops_tests.cpp b/cpp/tests/reductions/collect_ops_tests.cpp
index a0fdab5e994..842aaa3ab07 100644
--- a/cpp/tests/reductions/collect_ops_tests.cpp
+++ b/cpp/tests/reductions/collect_ops_tests.cpp
@@ -196,15 +196,19 @@ TEST_F(CollectTest, CollectSetWithNaN)
   // nan unequal with null equal
   fp_wrapper expected1{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f},
                        {1, 1, 1, 1, 1, 1, 1, 0}};
-  auto const ret1 = collect_set(col, make_collect_set_aggregation<reduce_aggregation>());
+  auto const ret1 =
+    collect_set(col,
+                make_collect_set_aggregation<reduce_aggregation>(
+                  null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast<list_scalar*>(ret1.get())->view());
 
   // nan unequal with null unequal
   fp_wrapper expected2{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, -NAN, NAN, NAN, 0.0f, 0.0f},
                        {1, 1, 1, 1, 1, 1, 1, 0, 0}};
-  auto const ret2 = collect_set(
-    col,
-    make_collect_set_aggregation<reduce_aggregation>(null_policy::INCLUDE, null_equality::UNEQUAL));
+  auto const ret2 =
+    collect_set(col,
+                make_collect_set_aggregation<reduce_aggregation>(
+                  null_policy::INCLUDE, null_equality::UNEQUAL, nan_equality::UNEQUAL));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast<list_scalar*>(ret2.get())->view());
 
   // nan equal with null equal
diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp
index 9dc13b2f9f7..8a396d793a3 100644
--- a/cpp/tests/rolling/collect_ops_test.cpp
+++ b/cpp/tests/rolling/collect_ops_test.cpp
@@ -2118,13 +2118,14 @@ TEST_F(CollectSetTest, FloatGroupedRollingWindowWithNaNs)
   auto const following   = 1;
   auto const min_periods = 1;
   // test on nan_equality::UNEQUAL
-  auto const result =
-    grouped_rolling_collect_set(table_view{std::vector<column_view>{group_column}},
-                                input_column,
-                                preceding,
-                                following,
-                                min_periods,
-                                *make_collect_set_aggregation<rolling_aggregation>());
+  auto const result = grouped_rolling_collect_set(
+    table_view{std::vector<column_view>{group_column}},
+    input_column,
+    preceding,
+    following,
+    min_periods,
+    *make_collect_set_aggregation<rolling_aggregation>(
+      null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
 
   auto const expected_result = lists_column_wrapper<double>{
     {{0.2341, 1.23}, std::initializer_list<bool>{true, true}},
@@ -2186,7 +2187,8 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs)
                         prev_column,
                         foll_column,
                         1,
-                        *make_collect_set_aggregation<rolling_aggregation>());
+                        *make_collect_set_aggregation<rolling_aggregation>(
+                          null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
 
   auto const expected_result =
     lists_column_wrapper<double>{
@@ -2200,8 +2202,13 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs)
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_column_based_window->view());
 
-  auto const result_fixed_window = rolling_collect_set(
-    input_column, 2, 1, 1, *make_collect_set_aggregation<rolling_aggregation>());
+  auto const result_fixed_window =
+    rolling_collect_set(input_column,
+                        2,
+                        1,
+                        1,
+                        *make_collect_set_aggregation<rolling_aggregation>(
+                          null_policy::INCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_fixed_window->view());
 
   auto const result_with_nulls_excluded =
@@ -2209,7 +2216,8 @@ TEST_F(CollectSetTest, BasicRollingWindowWithNaNs)
                         2,
                         1,
                         1,
-                        *make_collect_set_aggregation<rolling_aggregation>(null_policy::EXCLUDE));
+                        *make_collect_set_aggregation<rolling_aggregation>(
+                          null_policy::EXCLUDE, null_equality::EQUAL, nan_equality::UNEQUAL));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result->view(), result_with_nulls_excluded->view());
 

From 5803015be119d7a52b11500489477592fbfb7177 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 20 Oct 2022 17:25:07 -0500
Subject: [PATCH 055/202] Rename libcudf++ to libcudf. (#11953)

For consistency across our documentation, this PR renames `libcudf++` to `libcudf`.

Authors:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11953
---
 cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 4 ++--
 cpp/doxygen/developer_guide/DOCUMENTATION.md   | 2 +-
 python/cudf/cudf/_lib/avro.pyx                 | 4 ++--
 python/cudf/cudf/_lib/io/utils.pyx             | 6 +++---
 python/cudf/cudf/_lib/json.pyx                 | 2 +-
 python/cudf/cudf/_lib/reduce.pyx               | 6 +++---
 python/cudf/cudf/core/frame.py                 | 2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index 52c443cd764..606dabcb937 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -176,7 +176,7 @@ Resource ownership is an essential concept in libcudf. In short, an "owning" obj
 resource (such as device memory). It acquires that resource during construction and releases the
 resource in destruction ([RAII](https://en.cppreference.com/w/cpp/language/raii)). A "non-owning"
 object does not own resources. Any class in libcudf with the `*_view` suffix is non-owning. For more
-detail see the [`libcudf++` presentation.](https://docs.google.com/presentation/d/1zKzAtc1AWFKfMhiUlV5yRZxSiPLwsObxMlWRWz_f5hA/edit?usp=sharing)
+detail see the [`libcudf` presentation.](https://docs.google.com/presentation/d/1zKzAtc1AWFKfMhiUlV5yRZxSiPLwsObxMlWRWz_f5hA/edit?usp=sharing)
 
 libcudf functions typically take views as input (`column_view` or `table_view`)
 and produce `unique_ptr`s to owning objects as output. For example,
@@ -403,7 +403,7 @@ Functions like merge or groupby in libcudf make no guarantees about the order of
 Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
 Calling code is responsible for performing sorts after the fact if sorted outputs are needed.
 
-# libcudf++ API and Implementation
+# libcudf API and Implementation
 
 ## Streams
 
diff --git a/cpp/doxygen/developer_guide/DOCUMENTATION.md b/cpp/doxygen/developer_guide/DOCUMENTATION.md
index 8a7d89c8dbd..07ef1bdc530 100644
--- a/cpp/doxygen/developer_guide/DOCUMENTATION.md
+++ b/cpp/doxygen/developer_guide/DOCUMENTATION.md
@@ -1,4 +1,4 @@
-# libcudf++ C++ Documentation Guide
+# libcudf C++ Documentation Guide
 
 These guidelines apply to documenting all libcudf C++ source files using doxygen style formatting although only public APIs and classes are actually [published](https://docs.rapids.ai/api/libcudf/stable/index.html).
 
diff --git a/python/cudf/cudf/_lib/avro.pyx b/python/cudf/cudf/_lib/avro.pyx
index b6e23e7c3a0..0c8886ca356 100644
--- a/python/cudf/cudf/_lib/avro.pyx
+++ b/python/cudf/cudf/_lib/avro.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -16,7 +16,7 @@ from cudf._lib.utils cimport data_from_unique_ptr
 
 cpdef read_avro(datasource, columns=None, skip_rows=-1, num_rows=-1):
     """
-    Cython function to call libcudf++ read_avro, see `read_avro`.
+    Cython function to call libcudf read_avro, see `read_avro`.
 
     See Also
     --------
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
index 18b26bb5aa6..21939ff39b6 100644
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ b/python/cudf/cudf/_lib/io/utils.pyx
@@ -30,7 +30,7 @@ import cudf
 from cudf.api.types import is_struct_dtype
 
 
-# Converts the Python source input to libcudf++ IO source_info
+# Converts the Python source input to libcudf IO source_info
 # with the appropriate type and source values
 cdef source_info make_source_info(list src) except*:
     if not src:
@@ -80,7 +80,7 @@ cdef source_info make_source_info(list src) except*:
 
     return source_info(c_host_buffers)
 
-# Converts the Python sink input to libcudf++ IO sink_info.
+# Converts the Python sink input to libcudf IO sink_info.
 cdef sink_info make_sinks_info(
     list src, vector[unique_ptr[data_sink]] & sink
 ) except*:
@@ -129,7 +129,7 @@ cdef sink_info make_sink_info(src, unique_ptr[data_sink] & sink) except*:
     return info
 
 
-# Adapts a python io.IOBase object as a libcudf++ IO data_sink. This lets you
+# Adapts a python io.IOBase object as a libcudf IO data_sink. This lets you
 # write from cudf to any python file-like object (File/BytesIO/SocketIO etc)
 cdef cppclass iobase_data_sink(data_sink):
     object buf
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index b0aafc275d6..5efe40ed2e9 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -46,7 +46,7 @@ cpdef read_json(object filepaths_or_buffers,
 
     # If input data is a JSON string (or StringIO), hold a reference to
     # the encoded memoryview externally to ensure the encoded buffer
-    # isn't destroyed before calling libcudf++ `read_json()`
+    # isn't destroyed before calling libcudf `read_json()`
     for idx in range(len(filepaths_or_buffers)):
         if isinstance(filepaths_or_buffers[idx], io.StringIO):
             filepaths_or_buffers[idx] = \
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index bdbe7e1c668..c1494df9cac 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -35,7 +35,7 @@ cimport cudf._lib.cpp.types as libcudf_types
 
 def reduce(reduction_op, Column incol, dtype=None, **kwargs):
     """
-    Top level Cython reduce function wrapping libcudf++ reductions.
+    Top level Cython reduce function wrapping libcudf reductions.
 
     Parameters
     ----------
@@ -91,7 +91,7 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
 
 def scan(scan_op, Column incol, inclusive, **kwargs):
     """
-    Top level Cython scan function wrapping libcudf++ scans.
+    Top level Cython scan function wrapping libcudf scans.
 
     Parameters
     ----------
@@ -122,7 +122,7 @@ def scan(scan_op, Column incol, inclusive, **kwargs):
 
 def minmax(Column incol):
     """
-    Top level Cython minmax function wrapping libcudf++ minmax.
+    Top level Cython minmax function wrapping libcudf minmax.
 
     Parameters
     ----------
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 4fb914a6409..9e539ee157b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1412,7 +1412,7 @@ def searchsorted(
         >>> df.searchsorted(values_df, ascending=False)
         array([4, 4, 4, 0], dtype=int32)
         """
-        # Call libcudf++ search_sorted primitive
+        # Call libcudf search_sorted primitive
 
         if na_position not in {"first", "last"}:
             raise ValueError(f"invalid na_position: {na_position}")

From b9ba9e3e47e66bdc716c01c10606580ca92fa587 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 21 Oct 2022 10:12:01 -0400
Subject: [PATCH 056/202] Update Unit Testing in libcudf guidelines to code
 tests outside the cudf::test namespace (#11959)

Update text to include coding tests outside the `cudf` or the `cudf::test` namespace.
Realized our test guidelines needed to be updated while working on #11734.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11959
---
 cpp/doxygen/developer_guide/TESTING.md | 150 +++++++++++++------------
 1 file changed, 78 insertions(+), 72 deletions(-)

diff --git a/cpp/doxygen/developer_guide/TESTING.md b/cpp/doxygen/developer_guide/TESTING.md
index 31747e31ccb..198590bb35c 100644
--- a/cpp/doxygen/developer_guide/TESTING.md
+++ b/cpp/doxygen/developer_guide/TESTING.md
@@ -6,6 +6,13 @@ Unit tests in libcudf are written using
 **Important:** Instead of including `gtest/gtest.h` directly, use
 `#include <cudf_test/cudf_gtest.hpp>`.
 
+Also, write test code in the global namespace. That is,
+do not write test code in the `cudf` or the `cudf::test` namespace or their
+sub-namespaces.
+Likewise, do not use `using namespace cudf;` or `using namespace cudf::test;`
+in the global namespace.
+
+
 ## Best Practices: What Should We Test?
 
 In general we should test to make sure all code paths are covered. This is not always easy or
@@ -38,8 +45,8 @@ groupby).  Here are some other guidelines.
    does happen); columns with zero size but that somehow have non-null data pointers; and struct
    columns with no children.
 
- * Decimal types are not included in the `NumericTypes` type list, but are included in
-   `FixedWidthTypes`, so be careful that tests either include or exclude decimal types as
+ * Decimal types are not included in the `cudf::test::NumericTypes` type list, but are included in
+   `cudf::test::FixedWidthTypes`, so be careful that tests either include or exclude decimal types as
    appropriate.
 
 
@@ -99,8 +106,8 @@ list defined in `TestTypes` (`int, float, double`).
 
 The list of types that are used in tests should be consistent across all tests. To ensure
 consistency, several sets of common type lists are provided in
-`include/cudf_test/type_lists.hpp`. For example, `NumericTypes` is a type list of all numeric types,
-`FixedWidthTypes` is a list of all fixed-width element types, and `AllTypes` is a list of every
+`include/cudf_test/type_lists.hpp`. For example, `cudf::test::NumericTypes` is a type list of all numeric types,
+`FixedWidthTypes` is a list of all fixed-width element types, and `cudf::test::AllTypes` is a list of every
 element type that libcudf supports.
 
 ```c++
@@ -126,9 +133,8 @@ the `N`th type within the nested list, use `GetType<NestedList, N>`.
 Imagine testing all possible two-type combinations of `<int,float>`. This could be done manually:
 
 ```c++
-using namespace cudf::test;
 template <typename TwoTypes>
-TwoTypesFixture : BaseFixture{...};
+TwoTypesFixture : cudf::test::BaseFixture{...};
 using TwoTypesList = Types< Types<int, int>, Types<int, float>,
                             Types<float, int>, Types<float, float> >;
 TYPED_TEST_SUITE(TwoTypesFixture, TwoTypesList);
@@ -178,9 +184,9 @@ transparently passed to any API expecting a `column_view` or `mutable_column_vie
 
 #### fixed_width_column_wrapper
 
-The `fixed_width_column_wrapper` class should be used for constructing and initializing columns of
+The `cudf::test::fixed_width_column_wrapper` class should be used for constructing and initializing columns of
 any fixed-width element type, e.g., numeric types, timestamp types, Boolean, etc.
-`fixed_width_column_wrapper` provides constructors that accept an iterator range to generate each
+`cudf::test::fixed_width_column_wrapper` provides constructors that accept an iterator range to generate each
 element in the column. For nullable columns, an additional iterator can be provided to indicate the
 validity of each element. There are also constructors that accept a `std::initializer_list<T>` for
 the column elements and optionally for the validity of each element.
@@ -189,25 +195,25 @@ Example:
 
 ```c++
 // Creates a non-nullable column of INT32 elements with 5 elements: {0, 1, 2, 3, 4}
-auto elements = make_counting_transform_iterator(0, [](auto i){return i;});
-fixed_width_column_wrapper<int32_t> w(elements, elements + 5);
+auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i;});
+cudf::test::fixed_width_column_wrapper<int32_t> w(elements, elements + 5);
 
 // Creates a nullable column of INT32 elements with 5 elements: {null, 1, null, 3, null}
-auto elements = make_counting_transform_iterator(0, [](auto i){return i;});
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;})
-fixed_width_column_wrapper<int32_t> w(elements, elements + 5, validity);
+auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i;});
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;})
+cudf::test::fixed_width_column_wrapper<int32_t> w(elements, elements + 5, validity);
 
 // Creates a non-nullable INT32 column with 4 elements: {1, 2, 3, 4}
-fixed_width_column_wrapper<int32_t> w{{1, 2, 3, 4}};
+cudf::test::fixed_width_column_wrapper<int32_t> w{{1, 2, 3, 4}};
 
 // Creates a nullable INT32 column with 4 elements: {1, NULL, 3, NULL}
-fixed_width_column_wrapper<int32_t> w{ {1,2,3,4}, {1, 0, 1, 0}};
+cudf::test::fixed_width_column_wrapper<int32_t> w{ {1,2,3,4}, {1, 0, 1, 0}};
 ```
 
 #### fixed_point_column_wrapper
 
-The `fixed_point_column_wrapper` class should be used for constructing and initializing columns of
-any fixed-point element type (DECIMAL32 or DECIMAL64). `fixed_point_column_wrapper` provides
+The `cudf::test::fixed_point_column_wrapper` class should be used for constructing and initializing columns of
+any fixed-point element type (DECIMAL32 or DECIMAL64). `cudf::test::fixed_point_column_wrapper` provides
 constructors that accept an iterator range to generate each element in the column. For nullable
 columns, an additional iterator can be provided to indicate the validity of each element.
 Constructors also take the scale of the fixed-point values to create.
@@ -215,20 +221,20 @@ Constructors also take the scale of the fixed-point values to create.
 Example:
 
 ```c++
-    // Creates a non-nullable column of 4 DECIMAL32 elements of scale 3: {1000, 2000, 3000, 4000}
-    auto elements = make_counting_transform_iterator(0, [](auto i){ return i; });
-    fixed_point_column_wrapper<int32_t> w(elements, elements + 4, 3);
-
-    // Creates a nullable column of 5 DECIMAL32 elements of scale 2: {null, 100, null, 300, null}
-    auto elements = make_counting_transform_iterator(0, [](auto i){ return i; });
-    auto validity = make_counting_transform_iterator(0, [](auto i){ return i % 2; });
-    fixed_point_column_wrapper<int32_t> w(elements, elements + 5, validity, 2);
+// Creates a non-nullable column of 4 DECIMAL32 elements of scale 3: {1000, 2000, 3000, 4000}
+auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i; });
+cudf::test::fixed_point_column_wrapper<int32_t> w(elements, elements + 4, 3);
+
+// Creates a nullable column of 5 DECIMAL32 elements of scale 2: {null, 100, null, 300, null}
+auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i; });
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i % 2; });
+cudf::test::fixed_point_column_wrapper<int32_t> w(elements, elements + 5, validity, 2);
 ```
 
 #### dictionary_column_wrapper
 
-The `dictionary_column_wrapper` class should be used to create dictionary columns.
-`dictionary_column_wrapper` provides constructors that accept an iterator range to generate each
+The `cudf::test::dictionary_column_wrapper` class should be used to create dictionary columns.
+`cudf::test::dictionary_column_wrapper` provides constructors that accept an iterator range to generate each
 element in the column. For nullable columns, an additional iterator can be provided to indicate the
 validity of each element. There are also constructors that accept a `std::initializer_list<T>` for
 the column elements and optionally for the validity of each element.
@@ -239,43 +245,43 @@ Example:
 // Creates a non-nullable dictionary column of INT32 elements with 5 elements
 // keys = {0, 2, 6}, indices = {0, 1, 1, 2, 2}
 std::vector<int32_t> elements{0, 2, 2, 6, 6};
-dictionary_column_wrapper<int32_t> w(element.begin(), elements.end());
+cudf::test::dictionary_column_wrapper<int32_t> w(element.begin(), elements.end());
 
 // Creates a nullable dictionary column with 5 elements and a validity iterator.
 std::vector<int32_t> elements{0, 2, 0, 6, 0};
 // Validity iterator here sets even rows to null.
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;})
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;})
 // keys = {2, 6}, indices = {NULL, 0, NULL, 1, NULL}
-dictionary_column_wrapper<int32_t> w(elements, elements + 5, validity);
+cudf::test::dictionary_column_wrapper<int32_t> w(elements, elements + 5, validity);
 
 // Creates a non-nullable dictionary column with 4 elements.
 // keys = {1, 2, 3}, indices = {0, 1, 2, 0}
-dictionary_column_wrapper<int32_t> w{{1, 2, 3, 1}};
+cudf::test::dictionary_column_wrapper<int32_t> w{{1, 2, 3, 1}};
 
 // Creates a nullable dictionary column with 4 elements and validity initializer.
 // keys = {1, 3}, indices = {0, NULL, 1, NULL}
-dictionary_column_wrapper<int32_t> w{ {1, 0, 3, 0}, {1, 0, 1, 0}};
+cudf::test::dictionary_column_wrapper<int32_t> w{ {1, 0, 3, 0}, {1, 0, 1, 0}};
 
 // Creates a nullable column of dictionary elements with 5 elements and validity initializer.
 std::vector<int32_t> elements{0, 2, 2, 6, 6};
 // keys = {2, 6}, indices = {NULL, 0, NULL, 1, NULL}
-dictionary_width_column_wrapper<int32_t> w(elements, elements + 5, {0, 1, 0, 1, 0});
+cudf::test::dictionary_width_column_wrapper<int32_t> w(elements, elements + 5, {0, 1, 0, 1, 0});
 
 // Creates a non-nullable dictionary column with 7 string elements
 std::vector<std::string> strings{"", "aaa", "bbb", "aaa", "bbb", "ccc", "bbb"};
 // keys = {"","aaa","bbb","ccc"}, indices = {0, 1, 2, 1, 2, 3, 2}
-dictionary_column_wrapper<std::string> d(strings.begin(), strings.end());
+cudf::test::dictionary_column_wrapper<std::string> d(strings.begin(), strings.end());
 
 // Creates a nullable dictionary column with 7 string elements and a validity iterator.
 // Validity iterator here sets even rows to null.
 // keys = {"a", "bb"}, indices = {NULL, 1, NULL, 1, NULL, 0, NULL}
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;});
-dictionary_column_wrapper<std::string> d({"", "bb", "", "bb", "", "a", ""}, validity);
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;});
+cudf::test::dictionary_column_wrapper<std::string> d({"", "bb", "", "bb", "", "a", ""}, validity);
 ```
 
 #### strings_column_wrapper
 
-The `strings_column_wrapper` class should be used to create columns of strings. It provides
+The `cudf::test::strings_column_wrapper` class should be used to create columns of strings. It provides
 constructors that accept an iterator range to generate each string in the column. For nullable
 columns, an additional iterator can be provided to indicate the validity of each string. There are
 also constructors that accept a `std::initializer_list<std::string>` for the column's strings and
@@ -287,27 +293,27 @@ Example:
 // Creates a non-nullable STRING column with 7 string elements:
 // {"", "this", "is", "a", "column", "of", "strings"}
 std::vector<std::string> strings{"", "this", "is", "a", "column", "of", "strings"};
-strings_column_wrapper s(strings.begin(), strings.end());
+cudf::test::strings_column_wrapper s(strings.begin(), strings.end());
 
 // Creates a nullable STRING column with 7 string elements:
 // {NULL, "this", NULL, "a", NULL, "of", NULL}
 std::vector<std::string> strings{"", "this", "is", "a", "column", "of", "strings"};
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;});
-strings_column_wrapper s(strings.begin(), strings.end(), validity);
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;});
+cudf::test::strings_column_wrapper s(strings.begin(), strings.end(), validity);
 
 // Creates a non-nullable STRING column with 7 string elements:
 // {"", "this", "is", "a", "column", "of", "strings"}
-strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"});
+cudf::test::strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"});
 
 // Creates a nullable STRING column with 7 string elements:
 // {NULL, "this", NULL, "a", NULL, "of", NULL}
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;});
-strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"}, validity);
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;});
+cudf::test::strings_column_wrapper s({"", "this", "is", "a", "column", "of", "strings"}, validity);
 ```
 
 #### lists_column_wrapper
 
-The `lists_column_wrapper` class should be used to create columns of lists. It provides
+The `cudf::test::lists_column_wrapper` class should be used to create columns of lists. It provides
 constructors that accept an iterator range to generate each list in the column. For nullable
 columns, an additional iterator can be provided to indicate the validity of each list. There are
 also constructors that accept a `std::initializer_list<T>` for the column's lists and
@@ -318,50 +324,50 @@ Example:
 ```c++
 // Creates an empty LIST column
 // []
-lists_column_wrapper l{};
+cudf::test::lists_column_wrapper l{};
 
 // Creates a LIST column with 1 list composed of 2 total integers
 // [{0, 1}]
-lists_column_wrapper l{0, 1};
+cudf::test::lists_column_wrapper l{0, 1};
 
 // Creates a LIST column with 3 lists
 // [{0, 1}, {2, 3}, {4, 5}]
-lists_column_wrapper l{ {0, 1}, {2, 3}, {4, 5} };
+cudf::test::lists_column_wrapper l{ {0, 1}, {2, 3}, {4, 5} };
 
 // Creates a LIST of LIST columns with 2 lists on the top level and
 // 4 below
 // [ {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}} ]
-lists_column_wrapper l{ {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}} };
+cudf::test::lists_column_wrapper l{ {{0, 1}, {2, 3}}, {{4, 5}, {6, 7}} };
 
 // Creates a LIST column with 1 list composed of 5 total integers
 // [{0, 1, 2, 3, 4}]
-auto elements = make_counting_transform_iterator(0, [](auto i){return i*2;});
-lists_column_wrapper l(elements, elements+5);
+auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i*2;});
+cudf::test::lists_column_wrapper l(elements, elements+5);
 
 // Creates a LIST column with 1 lists composed of 2 total integers
 // [{0, NULL}]
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;});
-lists_column_wrapper l{{0, 1}, validity};
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;});
+cudf::test::lists_column_wrapper l{{0, 1}, validity};
 
 // Creates a LIST column with 1 lists composed of 5 total integers
 // [{0, NULL, 2, NULL, 4}]
-auto elements = make_counting_transform_iterator(0, [](auto i){return i*2;});
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;});
-lists_column_wrapper l(elements, elements+5, validity);
+auto elements = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i*2;});
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;});
+cudf::test::lists_column_wrapper l(elements, elements+5, validity);
 
 // Creates a LIST column with 1 list composed of 2 total strings
 // [{"abc", "def"}]
-lists_column_wrapper l{"abc", "def"};
+cudf::test::lists_column_wrapper l{"abc", "def"};
 
 // Creates a LIST of LIST columns with 2 lists on the top level and 4 below
 // [ {{0, 1}, NULL}, {{4, 5}, NULL} ]
-auto validity = make_counting_transform_iterator(0, [](auto i){return i % 2;});
-lists_column_wrapper l{ {{{0, 1}, {2, 3}}, validity}, {{{4, 5}, {6, 7}}, validity} };
+auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i){return i % 2;});
+cudf::test::lists_column_wrapper l{ {{{0, 1}, {2, 3}}, validity}, {{{4, 5}, {6, 7}}, validity} };
 ```
 
 #### structs_column_wrapper
 
-The `structs_column_wrapper` class should be used to create columns of structs. It provides
+The `cudf::test::structs_column_wrapper` class should be used to create columns of structs. It provides
 constructors that accept a vector or initializer list of pre-constructed columns or column wrappers
 for child columns. For nullable columns, an additional iterator can be provided to indicate the
 validity of each struct.
@@ -370,41 +376,41 @@ Examples:
 
 ```c++
 // The following constructs a column for struct< int, string >.
-auto child_int_col = fixed_width_column_wrapper<int32_t>{ 1, 2, 3, 4, 5 }.release();
-auto child_string_col = string_column_wrapper {"All", "the", "leaves", "are", "brown"}.release();
+auto child_int_col = cudf::test::fixed_width_column_wrapper<int32_t>{ 1, 2, 3, 4, 5 }.release();
+auto child_string_col = cudf::test::string_column_wrapper {"All", "the", "leaves", "are", "brown"}.release();
 
-std::vector<std::unique_ptr<column>> child_columns;
+std::vector<std::unique_ptr<cudf::column>> child_columns;
 child_columns.push_back(std::move(child_int_col));
 child_columns.push_back(std::move(child_string_col));
 
-struct_column_wrapper struct_column_wrapper{
+cudf::test::struct_col wrapper wrapper{
   child_cols,
   {1,0,1,0,1} // Validity
 };
 
-auto struct_col {struct_column_wrapper.release()};
+auto struct_col {wrapper.release()};
 
 // The following constructs a column for struct< int, string >.
-fixed_width_column_wrapper<int32_t> child_int_col_wrapper{ 1, 2, 3, 4, 5 };
-string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"};
+cudf::test::fixed_width_column_wrapper<int32_t> child_int_col_wrapper{ 1, 2, 3, 4, 5 };
+cudf::test::string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"};
 
-struct_column_wrapper struct_column_wrapper{
+cudf::test::struct_column_wrapper wrapper{
   {child_int_col_wrapper, child_string_col_wrapper}
   {1,0,1,0,1} // Validity
 };
 
-auto struct_col {struct_column_wrapper.release()};
+auto struct_col {wrapper.release()};
 
 // The following constructs a column for struct< int, string >.
-fixed_width_column_wrapper<int32_t> child_int_col_wrapper{ 1, 2, 3, 4, 5 };
-string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"};
+cudf::test::fixed_width_column_wrapper<int32_t> child_int_col_wrapper{ 1, 2, 3, 4, 5 };
+cudf::test::string_column_wrapper child_string_col_wrapper {"All", "the", "leaves", "are", "brown"};
 
-struct_column_wrapper struct_column_wrapper{
+cudf::test::struct_column_wrapper wrapper{
   {child_int_col_wrapper, child_string_col_wrapper}
   cudf::detail::make_counting_transform_iterator(0, [](auto i){ return i % 2; }) // Validity
 };
 
-auto struct_col {struct_column_wrapper.release()};
+auto struct_col {wrapper.release()};
 ```
 
 ### Column Comparison Utilities

From dec8bde1d5cc7462e52535a4e26f2c1be507a237 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 21 Oct 2022 07:45:01 -0700
Subject: [PATCH 057/202] Add tests ensuring that cudf's default stream is
 always used (#11875)

This PR ensures that cudf's default stream is properly passed to all kernel launches so that nothing implicitly runs on the CUDA default stream. It adds a small library that is built during the tests and overloads CUDA functions to throw an exception when usage of the default stream is detected. It also fixes all remaining usage of anything other than cudf's default stream (I fixed most of the issues in previous PRs, but I found a few others when finalizing this one).

Resolves #11929
Resolves #11942

### Important notes for reviewers:
- **The changeset is deceptively large.** The vast majority of the changes are just a global find-and-replace of `cudf::get_default_stream()` for `cudf::default_stream_value`, as well as a few smaller fixes such as missing `CUDF_TEST_PROGRAM_MAIN` in a couple of tests and usage of `rmm::cuda_stream_default`. The meaningful changes are:
    - The new default stream getter/setter in `default_stream.[hpp|cpp]`
    - The addition of `cpp/tests/utilities/identify_stream_usage`
    - The changes to the base testing fixture in `cpp/include/cudf_test/base_fixture.hpp` to inject the custom stream.
    - The changes to CI in `ci/gpu/build.sh` to build and use the new library.
- This PR is a breaking change because it moves the default stream into the detail namespace. Going forward the default stream may only be accessed using the public accessor `cudf::get_default_stream()`. I have added a corresponding setter, but it is also in the detail namespace since I do not want to publicly support changing the default stream yet, only for the purpose of testing. Reviewers, please leave comments if you disagree with those choices.
- I have made getting and setting the default stream thread-safe, but there is still only a single stream. In multi-threaded applications we may want to support a stream per thread so that users could manually achieve PTDS with more fine-tuned control. Is this worthwhile? Even if it is, I'm inclined to wait for a subsequent PR to implement this unless someone feels strongly otherwise.
- I'm currently only overloading `cudaLaunchKernel`. I can add overloads for other functions as well, but I didn't want to go through the effort of overloading every possible API. If reviewers have a minimal set that they'd like to see overloaded, let me know. [I've included links to all the relevant pages of the CUDA runtime API in the identify_stream_usage.cu file](https://github.com/rapidsai/cudf/pull/11875/files#diff-0b2762207c27c080acd2114475c7a1c06377a7c18c4e9c3de60ecbdc82a4dc61R99) if someone wants to look through them.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Bradley Dice (https://github.com/bdice)
  - Sevag H (https://github.com/sevagh)
  - https://github.com/brandon-b-miller
  - Jake Hemstad (https://github.com/jrhemstad)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/11875
---
 ci/gpu/build.sh                               |  17 +-
 ci/release/update-version.sh                  |   3 +
 conda/recipes/libcudf/meta.yaml               |   2 +
 cpp/benchmarks/column/concatenate.cpp         |   6 +-
 cpp/benchmarks/common/generate_input.cu       |  20 +-
 .../common/random_distribution_factory.cuh    |   6 +-
 cpp/benchmarks/copying/copy_if_else.cpp       |   2 +-
 cpp/benchmarks/copying/shift.cu               |   2 +-
 cpp/benchmarks/groupby/group_max.cpp          |   2 +-
 cpp/benchmarks/groupby/group_nunique.cpp      |   2 +-
 cpp/benchmarks/groupby/group_struct_keys.cpp  |   2 +-
 cpp/benchmarks/hashing/hash.cpp               |   2 +-
 cpp/benchmarks/io/csv/csv_reader_input.cpp    |   2 +-
 cpp/benchmarks/io/csv/csv_reader_options.cpp  |   2 +-
 cpp/benchmarks/io/json/nested_json.cpp        |   6 +-
 cpp/benchmarks/io/orc/orc_reader_input.cpp    |   2 +-
 cpp/benchmarks/io/orc/orc_reader_options.cpp  |   2 +-
 cpp/benchmarks/io/orc/orc_writer.cpp          |   6 +-
 cpp/benchmarks/io/orc/orc_writer_chunks.cpp   |   4 +-
 .../io/parquet/parquet_reader_input.cpp       |   2 +-
 .../io/parquet/parquet_reader_options.cpp     |   2 +-
 cpp/benchmarks/io/parquet/parquet_writer.cpp  |   6 +-
 .../io/parquet/parquet_writer_chunks.cpp      |   4 +-
 cpp/benchmarks/io/text/multibyte_split.cpp    |   4 +-
 cpp/benchmarks/iterator/iterator.cu           |   2 +-
 cpp/benchmarks/join/generate_input_tables.cuh |   2 +-
 cpp/benchmarks/join/join_common.hpp           |   4 +-
 cpp/benchmarks/lists/copying/scatter_lists.cu |   2 +-
 cpp/benchmarks/quantiles/quantiles.cpp        |   2 +-
 cpp/benchmarks/reduction/segment_reduce.cu    |   2 +-
 cpp/benchmarks/sort/rank.cpp                  |   2 +-
 cpp/benchmarks/sort/sort.cpp                  |   2 +-
 cpp/benchmarks/sort/sort_strings.cpp          |   2 +-
 cpp/benchmarks/stream_compaction/distinct.cpp |   4 +-
 cpp/benchmarks/stream_compaction/unique.cpp   |   2 +-
 cpp/benchmarks/string/case.cpp                |   2 +-
 cpp/benchmarks/string/combine.cpp             |   2 +-
 cpp/benchmarks/string/contains.cpp            |   2 +-
 cpp/benchmarks/string/copy.cu                 |   2 +-
 cpp/benchmarks/string/factory.cu              |   4 +-
 cpp/benchmarks/string/filter.cpp              |   2 +-
 cpp/benchmarks/string/find.cpp                |   2 +-
 cpp/benchmarks/string/like.cpp                |   2 +-
 cpp/benchmarks/string/repeat_strings.cpp      |   8 +-
 cpp/benchmarks/string/replace.cpp             |   2 +-
 cpp/benchmarks/string/replace_re.cpp          |   2 +-
 cpp/benchmarks/string/split.cpp               |   2 +-
 cpp/benchmarks/string/substring.cpp           |   2 +-
 cpp/benchmarks/string/translate.cpp           |   2 +-
 cpp/benchmarks/string/url_decode.cu           |   2 +-
 .../synchronization/synchronization.hpp       |   4 +-
 cpp/benchmarks/text/normalize.cpp             |   2 +-
 cpp/benchmarks/text/normalize_spaces.cpp      |   2 +-
 cpp/benchmarks/text/tokenize.cpp              |   2 +-
 .../type_dispatcher/type_dispatcher.cu        |   4 +-
 cpp/include/cudf/binaryop.hpp                 |   4 +-
 cpp/include/cudf/column/column.hpp            |   6 +-
 .../cudf/column/column_device_view.cuh        |   4 +-
 cpp/include/cudf/column/column_factories.hpp  |  34 +-
 cpp/include/cudf/detail/binaryop.hpp          |   8 +-
 cpp/include/cudf/detail/concatenate.hpp       |   4 +-
 cpp/include/cudf/detail/copy.hpp              |  42 +--
 cpp/include/cudf/detail/copy_if.cuh           |   2 +-
 cpp/include/cudf/detail/copy_range.cuh        |   6 +-
 cpp/include/cudf/detail/datetime.hpp          |  32 +-
 cpp/include/cudf/detail/fill.hpp              |   4 +-
 cpp/include/cudf/detail/gather.cuh            |   2 +-
 cpp/include/cudf/detail/gather.hpp            |   4 +-
 .../detail/groupby/group_replace_nulls.hpp    |   2 +-
 cpp/include/cudf/detail/hashing.hpp           |   8 +-
 cpp/include/cudf/detail/interop.hpp           |   8 +-
 cpp/include/cudf/detail/is_element_valid.hpp  |   2 +-
 cpp/include/cudf/detail/join.hpp              |   2 +-
 cpp/include/cudf/detail/label_bins.hpp        |   2 +-
 cpp/include/cudf/detail/null_mask.hpp         |   4 +-
 cpp/include/cudf/detail/quantiles.hpp         |   6 +-
 .../cudf/detail/reduction_functions.hpp       |  42 +--
 cpp/include/cudf/detail/repeat.hpp            |   4 +-
 cpp/include/cudf/detail/replace.hpp           |  14 +-
 cpp/include/cudf/detail/reshape.hpp           |   4 +-
 cpp/include/cudf/detail/rolling.hpp           |   2 +-
 cpp/include/cudf/detail/round.hpp             |   2 +-
 cpp/include/cudf/detail/scatter.cuh           |   2 +-
 cpp/include/cudf/detail/scatter.hpp           |  10 +-
 cpp/include/cudf/detail/sequence.hpp          |   6 +-
 cpp/include/cudf/detail/sorting.hpp           |  18 +-
 cpp/include/cudf/detail/stream_compaction.hpp |  22 +-
 cpp/include/cudf/detail/structs/utilities.hpp |   4 +-
 cpp/include/cudf/detail/tdigest/tdigest.hpp   |   6 +-
 cpp/include/cudf/detail/transform.hpp         |  16 +-
 cpp/include/cudf/detail/transpose.hpp         |   2 +-
 cpp/include/cudf/detail/unary.hpp             |  10 +-
 cpp/include/cudf/detail/utilities/cuda.cuh    |   2 +-
 .../cudf/detail/utilities/default_stream.hpp  |  36 ++
 .../detail/utilities/vector_factories.hpp     |  18 +-
 cpp/include/cudf/detail/valid_if.cuh          |   2 +-
 .../cudf/dictionary/detail/concatenate.hpp    |   2 +-
 cpp/include/cudf/dictionary/detail/encode.hpp |   4 +-
 .../cudf/dictionary/detail/replace.hpp        |   4 +-
 cpp/include/cudf/dictionary/detail/search.hpp |   4 +-
 .../cudf/dictionary/detail/update_keys.hpp    |  12 +-
 .../cudf/dictionary/dictionary_factories.hpp  |   4 +-
 cpp/include/cudf/io/detail/avro.hpp           |   2 +-
 cpp/include/cudf/io/detail/csv.hpp            |   2 +-
 cpp/include/cudf/io/detail/json.hpp           |   2 +-
 cpp/include/cudf/io/detail/orc.hpp            |   2 +-
 cpp/include/cudf/join.hpp                     |  14 +-
 cpp/include/cudf/lists/detail/concatenate.hpp |   2 +-
 cpp/include/cudf/lists/detail/gather.cuh      |   2 +-
 cpp/include/cudf/lists/detail/scatter.cuh     |   6 +-
 .../cudf/lists/lists_column_factories.hpp     |   2 +-
 cpp/include/cudf/partitioning.hpp             |   2 +-
 cpp/include/cudf/scalar/scalar.hpp            |  80 ++---
 cpp/include/cudf/scalar/scalar_factories.hpp  |  24 +-
 cpp/include/cudf/strings/detail/combine.hpp   |   4 +-
 .../cudf/strings/detail/concatenate.hpp       |   2 +-
 cpp/include/cudf/strings/detail/copying.hpp   |   2 +-
 cpp/include/cudf/strings/detail/fill.hpp      |   2 +-
 cpp/include/cudf/strings/detail/json.hpp      |   2 +-
 cpp/include/cudf/strings/detail/replace.hpp   |   8 +-
 cpp/include/cudf/strings/detail/scatter.cuh   |   2 +-
 cpp/include/cudf/strings/detail/utilities.cuh |   6 +-
 cpp/include/cudf/strings/detail/utilities.hpp |   4 +-
 .../cudf/table/experimental/row_operators.cuh |   4 +-
 cpp/include/cudf/table/table.hpp              |   2 +-
 cpp/include/cudf/table/table_device_view.cuh  |   4 +-
 cpp/include/cudf/utilities/default_stream.hpp |  14 +-
 cpp/include/cudf_test/base_fixture.hpp        |  36 +-
 cpp/include/cudf_test/column_utilities.hpp    |   4 +-
 cpp/include/cudf_test/column_wrapper.hpp      |  12 +-
 .../stream_checking_resource_adapter.hpp      | 166 +++++++++
 cpp/include/cudf_test/tdigest_utilities.cuh   |   6 +-
 cpp/include/nvtext/bpe_tokenize.hpp           |   4 +-
 cpp/include/nvtext/detail/tokenize.hpp        |   8 +-
 cpp/src/binaryop/binaryop.cpp                 |   8 +-
 cpp/src/binaryop/compiled/binary_ops.hpp      |  12 +-
 .../binaryop/compiled/struct_binary_ops.cuh   |   4 +-
 cpp/src/bitmask/null_mask.cu                  |  10 +-
 cpp/src/column/column.cu                      |   4 +-
 cpp/src/column/column_view.cpp                |   4 +-
 cpp/src/copying/concatenate.cu                |   6 +-
 cpp/src/copying/contiguous_split.cu           |   2 +-
 cpp/src/copying/copy.cpp                      |   4 +-
 cpp/src/copying/copy.cu                       |   8 +-
 cpp/src/copying/copy_range.cu                 |   4 +-
 cpp/src/copying/gather.cu                     |   2 +-
 cpp/src/copying/get_element.cu                |   2 +-
 cpp/src/copying/pack.cpp                      |   2 +-
 cpp/src/copying/purge_nonempty_nulls.cu       |   6 +-
 cpp/src/copying/reverse.cu                    |   4 +-
 cpp/src/copying/sample.cu                     |   2 +-
 cpp/src/copying/scatter.cu                    |   8 +-
 cpp/src/copying/shift.cu                      |   2 +-
 cpp/src/copying/slice.cu                      |   8 +-
 cpp/src/copying/split.cpp                     |   8 +-
 cpp/src/datetime/datetime_ops.cu              |  40 +--
 cpp/src/dictionary/add_keys.cu                |   2 +-
 cpp/src/dictionary/decode.cu                  |   2 +-
 cpp/src/dictionary/encode.cu                  |   2 +-
 cpp/src/dictionary/remove_keys.cu             |  10 +-
 cpp/src/dictionary/search.cu                  |   4 +-
 cpp/src/dictionary/set_keys.cu                |   4 +-
 cpp/src/filling/calendrical_month_sequence.cu |   2 +-
 cpp/src/filling/fill.cu                       |   4 +-
 cpp/src/filling/repeat.cu                     |   4 +-
 cpp/src/filling/sequence.cu                   |   4 +-
 cpp/src/groupby/groupby.cu                    |  10 +-
 cpp/src/hash/concurrent_unordered_map.cuh     |  12 +-
 cpp/src/hash/hash_allocator.cuh               |   8 +-
 cpp/src/hash/hashing.cu                       |   2 +-
 cpp/src/interop/dlpack.cpp                    |   4 +-
 cpp/src/interop/from_arrow.cu                 |   2 +-
 cpp/src/interop/to_arrow.cu                   |   4 +-
 cpp/src/io/fst/logical_stack.cuh              |   2 +-
 cpp/src/io/functions.cpp                      |  22 +-
 cpp/src/io/json/json_column.cu                |   6 +-
 cpp/src/io/json/json_tree.cu                  |  10 +-
 cpp/src/io/json/nested_json_gpu.cu            |   2 +-
 cpp/src/io/orc/timezone.cuh                   |   2 +-
 cpp/src/io/text/bgzip_data_chunk_source.cu    |   6 +-
 cpp/src/io/text/multibyte_split.cu            |  20 +-
 cpp/src/io/utilities/hostdevice_vector.hpp    |   2 +-
 cpp/src/join/conditional_join.cu              |  18 +-
 cpp/src/join/conditional_join.hpp             |   4 +-
 cpp/src/join/cross_join.cu                    |   2 +-
 cpp/src/join/join.cu                          |   6 +-
 cpp/src/join/mixed_join.cu                    |  10 +-
 cpp/src/join/mixed_join_semi.cu               |   8 +-
 cpp/src/join/semi_join.cu                     |   4 +-
 cpp/src/labeling/label_bins.cu                |   2 +-
 .../combine/concatenate_list_elements.cu      |   2 +-
 cpp/src/lists/combine/concatenate_rows.cu     |   2 +-
 cpp/src/lists/contains.cu                     |  10 +-
 cpp/src/lists/copying/segmented_gather.cu     |   2 +-
 cpp/src/lists/count_elements.cu               |   2 +-
 cpp/src/lists/explode.cu                      |   8 +-
 cpp/src/lists/extract.cu                      |   4 +-
 cpp/src/lists/segmented_sort.cu               |   4 +-
 cpp/src/lists/sequences.cu                    |   4 +-
 cpp/src/lists/set_operations.cu               |   8 +-
 .../stream_compaction/apply_boolean_mask.cu   |   2 +-
 cpp/src/lists/stream_compaction/distinct.cu   |   2 +-
 cpp/src/merge/merge.cu                        |   4 +-
 cpp/src/partitioning/partitioning.cu          |   2 +-
 cpp/src/partitioning/round_robin.cu           |   4 +-
 cpp/src/quantiles/quantile.cu                 |   2 +-
 cpp/src/quantiles/quantiles.cu                |   6 +-
 cpp/src/quantiles/tdigest/tdigest.cu          |   2 +-
 cpp/src/reductions/minmax.cu                  |   2 +-
 cpp/src/reductions/reductions.cpp             |   6 +-
 cpp/src/reductions/scan/scan.cpp              |   2 +-
 cpp/src/reductions/segmented_reductions.cpp   |   4 +-
 cpp/src/replace/clamp.cu                      |   4 +-
 cpp/src/replace/nans.cu                       |   8 +-
 cpp/src/replace/nulls.cu                      |   6 +-
 cpp/src/replace/replace.cu                    |   2 +-
 cpp/src/reshape/byte_cast.cu                  |   2 +-
 cpp/src/reshape/interleave_columns.cu         |   2 +-
 cpp/src/reshape/tile.cu                       |   2 +-
 .../rolling/detail/range_window_bounds.hpp    |   2 +-
 cpp/src/rolling/grouped_rolling.cu            |   8 +-
 cpp/src/rolling/rolling.cu                    |   6 +-
 cpp/src/round/round.cu                        |   2 +-
 cpp/src/scalar/scalar.cpp                     |   6 +-
 cpp/src/search/contains_column.cu             |   2 +-
 cpp/src/search/contains_scalar.cu             |   2 +-
 cpp/src/search/search_ordered.cu              |   4 +-
 cpp/src/sort/is_sorted.cu                     |   2 +-
 cpp/src/sort/rank.cu                          |   2 +-
 cpp/src/sort/segmented_sort.cu                |   8 +-
 cpp/src/sort/sort.cu                          |   8 +-
 cpp/src/sort/stable_sort.cu                   |   4 +-
 .../stream_compaction/apply_boolean_mask.cu   |   2 +-
 cpp/src/stream_compaction/distinct.cu         |   2 +-
 cpp/src/stream_compaction/drop_nans.cu        |   4 +-
 cpp/src/stream_compaction/drop_nulls.cu       |   4 +-
 cpp/src/stream_compaction/unique.cu           |   2 +-
 cpp/src/strings/attributes.cu                 |   6 +-
 cpp/src/strings/capitalize.cu                 |   6 +-
 cpp/src/strings/case.cu                       |   6 +-
 cpp/src/strings/char_types/char_types.cu      |   4 +-
 cpp/src/strings/combine/concatenate.cu        |   4 +-
 cpp/src/strings/combine/join.cu               |   2 +-
 cpp/src/strings/combine/join_list_elements.cu |   4 +-
 cpp/src/strings/contains.cu                   |   6 +-
 cpp/src/strings/convert/convert_booleans.cu   |   4 +-
 cpp/src/strings/convert/convert_datetime.cu   |   6 +-
 cpp/src/strings/convert/convert_durations.cu  |   4 +-
 .../strings/convert/convert_fixed_point.cu    |   6 +-
 cpp/src/strings/convert/convert_floats.cu     |   6 +-
 cpp/src/strings/convert/convert_hex.cu        |   6 +-
 cpp/src/strings/convert/convert_integers.cu   |   8 +-
 cpp/src/strings/convert/convert_ipv4.cu       |   6 +-
 cpp/src/strings/convert/convert_lists.cu      |   2 +-
 cpp/src/strings/convert/convert_urls.cu       |   4 +-
 cpp/src/strings/extract/extract.cu            |   2 +-
 cpp/src/strings/extract/extract_all.cu        |   2 +-
 cpp/src/strings/filter_chars.cu               |   2 +-
 cpp/src/strings/json/json_path.cu             |   2 +-
 cpp/src/strings/like.cu                       |   2 +-
 cpp/src/strings/padding.cu                    |   6 +-
 cpp/src/strings/repeat_strings.cu             |   8 +-
 cpp/src/strings/replace/backref_re.cu         |   2 +-
 cpp/src/strings/replace/multi_re.cu           |   2 +-
 cpp/src/strings/replace/replace.cu            |   6 +-
 cpp/src/strings/replace/replace_re.cu         |   4 +-
 cpp/src/strings/search/find.cu                |  20 +-
 cpp/src/strings/search/find_multiple.cu       |   2 +-
 cpp/src/strings/search/findall.cu             |   2 +-
 cpp/src/strings/split/partition.cu            |   8 +-
 cpp/src/strings/split/split.cu                |   8 +-
 cpp/src/strings/split/split_re.cu             |   8 +-
 cpp/src/strings/split/split_record.cu         |   6 +-
 cpp/src/strings/strings_column_factories.cu   |   2 +-
 cpp/src/strings/strip.cu                      |   4 +-
 cpp/src/strings/substring.cu                  |  10 +-
 cpp/src/strings/translate.cu                  |   2 +-
 cpp/src/strings/wrap.cu                       |   2 +-
 cpp/src/text/detokenize.cu                    |   2 +-
 cpp/src/text/edit_distance.cu                 |   4 +-
 cpp/src/text/generate_ngrams.cu               |   6 +-
 cpp/src/text/ngrams_tokenize.cu               |   4 +-
 cpp/src/text/normalize.cu                     |   4 +-
 cpp/src/text/replace.cu                       |   4 +-
 cpp/src/text/stemmer.cu                       |   6 +-
 cpp/src/text/subword/bpe_tokenizer.cu         |   2 +-
 cpp/src/text/subword/load_hash_file.cu        |   2 +-
 cpp/src/text/subword/load_merges_file.cu      |   2 +-
 cpp/src/text/subword/subword_tokenize.cu      |   2 +-
 cpp/src/text/tokenize.cu                      |  10 +-
 cpp/src/transform/bools_to_mask.cu            |   2 +-
 cpp/src/transform/compute_column.cu           |   2 +-
 cpp/src/transform/encode.cu                   |   2 +-
 cpp/src/transform/mask_to_bools.cu            |   2 +-
 cpp/src/transform/nans_to_nulls.cu            |   2 +-
 cpp/src/transform/one_hot_encode.cu           |   2 +-
 cpp/src/transform/row_bit_count.cu            |   2 +-
 cpp/src/transform/transform.cpp               |   2 +-
 cpp/src/transpose/transpose.cu                |   2 +-
 cpp/src/unary/cast_ops.cu                     |   2 +-
 cpp/src/unary/math_ops.cu                     |   2 +-
 cpp/src/unary/nan_ops.cu                      |   4 +-
 cpp/src/unary/null_ops.cu                     |   4 +-
 cpp/src/utilities/default_stream.cpp          |  13 +-
 cpp/tests/bitmask/bitmask_tests.cpp           | 242 ++++++-------
 cpp/tests/bitmask/set_nullmask_tests.cu       |   2 +-
 cpp/tests/column/column_device_view_test.cu   |   4 +-
 cpp/tests/column/column_test.cu               |  44 +--
 cpp/tests/column/compound_test.cu             |  34 +-
 cpp/tests/column/factories_test.cpp           |   2 +-
 cpp/tests/copying/concatenate_tests.cu        |  24 +-
 cpp/tests/copying/detail_gather_tests.cu      |   4 +-
 cpp/tests/copying/scatter_list_tests.cpp      |   4 +-
 cpp/tests/copying/shift_tests.cpp             |   4 +-
 .../device_atomics/device_atomics_test.cu     |  10 +-
 cpp/tests/error/error_handling_test.cu        |   9 +-
 cpp/tests/fixed_point/fixed_point_tests.cu    |   8 +-
 cpp/tests/groupby/lists_tests.cu              |   4 +-
 cpp/tests/groupby/tdigest_tests.cu            |   6 +-
 cpp/tests/hash_map/map_test.cu                |  30 +-
 cpp/tests/io/comp/decomp_test.cpp             |  10 +-
 cpp/tests/io/json_tree.cpp                    |  14 +-
 cpp/tests/io/json_type_cast_test.cu           |  14 +-
 cpp/tests/io/nested_json_test.cpp             |  20 +-
 cpp/tests/io/text/data_chunk_source_test.cpp  |  18 +-
 cpp/tests/io/type_inference_test.cu           |  73 ++--
 cpp/tests/iterator/iterator_tests.cuh         |  14 +-
 .../optional_iterator_test_numeric.cu         |   6 +-
 .../iterator/pair_iterator_test_numeric.cu    |   2 +-
 cpp/tests/join/conditional_join_tests.cu      |  26 +-
 cpp/tests/join/join_tests.cpp                 |   4 +-
 cpp/tests/join/mixed_join_tests.cu            |  10 +-
 cpp/tests/quantiles/percentile_approx_test.cu |   8 +-
 cpp/tests/quantiles/tdigest_utilities.cu      |  12 +-
 .../reductions/segmented_reduction_tests.cpp  | 134 +++++---
 cpp/tests/replace/replace_nulls_tests.cpp     |   4 +-
 cpp/tests/scalar/factories_test.cpp           |   2 +-
 cpp/tests/scalar/scalar_device_view_test.cu   |  22 +-
 .../apply_boolean_mask_tests.cpp              |   2 +-
 cpp/tests/strings/datetime_tests.cpp          |  23 +-
 cpp/tests/strings/factories_test.cu           |  20 +-
 .../table/experimental_row_operator_tests.cu  |   8 +-
 cpp/tests/table/table_view_tests.cu           |   2 +-
 cpp/tests/transform/row_bit_count_test.cu     |  12 +-
 cpp/tests/types/type_dispatcher_test.cu       |   8 +-
 cpp/tests/unary/cast_tests.cpp                |  20 +-
 cpp/tests/utilities/column_utilities.cu       |  62 ++--
 .../identify_stream_usage/CMakeLists.txt      |  60 ++++
 .../identify_stream_usage.cpp                 | 322 ++++++++++++++++++
 .../test_default_stream_identification.cu     |  39 +++
 cpp/tests/utilities_tests/span_tests.cu       |  31 +-
 cpp/tests/wrappers/timestamps_test.cu         |  20 +-
 .../main/native/include/maps_column_view.hpp  |  10 +-
 java/src/main/native/src/ColumnViewJni.cpp    |   2 +-
 java/src/main/native/src/ColumnViewJni.hpp    |   4 +-
 java/src/main/native/src/TableJni.cpp         |   2 +-
 .../main/native/src/aggregation128_utils.hpp  |   4 +-
 java/src/main/native/src/row_conversion.cu    |   2 +-
 java/src/main/native/src/row_conversion.hpp   |   8 +-
 .../cpp/src/strings/udf/udf_apis.cu           |   2 +-
 360 files changed, 2053 insertions(+), 1319 deletions(-)
 create mode 100644 cpp/include/cudf/detail/utilities/default_stream.hpp
 create mode 100644 cpp/include/cudf_test/stream_checking_resource_adapter.hpp
 create mode 100644 cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
 create mode 100644 cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
 create mode 100644 cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 41dac0e5e0f..fc020c4ca1e 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -202,11 +202,26 @@ else
     conda list --show-channel-urls
 
     gpuci_logger "GoogleTests"
+
+    # Set up library for finding incorrect default stream usage.
+    cd "$WORKSPACE/cpp/tests/utilities/identify_stream_usage/"
+    mkdir build && cd build && cmake .. -GNinja && ninja && ninja test
+    STREAM_IDENTIFY_LIB="$WORKSPACE/cpp/tests/utilities/identify_stream_usage/build/libidentify_stream_usage.so"
+
     # Run libcudf and libcudf_kafka gtests from libcudf-tests package
     for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
         test_name=$(basename ${gt})
+
         echo "Running GoogleTest $test_name"
-        ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
+        if [[ ${test_name} == "SPAN_TEST" ]]; then
+            # This one test is specifically designed to test using a thrust device
+            # vector, so we expect and allow it to include default stream usage.
+            gtest_filter="SpanTest.CanConstructFromDeviceContainers"
+            GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="-${gtest_filter}"
+            ${gt} --gtest_output=xml:"$WORKSPACE/test-results/" --gtest_filter="${gtest_filter}"
+        else
+            GTEST_CUDF_STREAM_MODE="custom" LD_PRELOAD=${STREAM_IDENTIFY_LIB} ${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
+        fi
     done
 
     # Test libcudf (csv, orc, and parquet) with `LIBCUDF_CUFILE_POLICY=KVIKIO`
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index c23f558f071..52dc22b6c49 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -34,6 +34,9 @@ function sed_runner() {
 # cpp update
 sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/CMakeLists.txt
 
+# cpp stream testing update
+sed_runner 's/'"VERSION ${CURRENT_SHORT_TAG}.*"'/'"VERSION ${NEXT_FULL_TAG}"'/g' cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
+
 # Python update
 sed_runner 's/'"cudf_version .*)"'/'"cudf_version ${NEXT_FULL_TAG})"'/g' python/cudf/CMakeLists.txt
 
diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index ccb0d685062..739c5409ca4 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -113,6 +113,7 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/transpose.hpp
         - test -f $PREFIX/include/cudf/detail/unary.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/default_stream.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
         - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
@@ -275,6 +276,7 @@ outputs:
         - test -f $PREFIX/include/cudf_test/file_utilities.hpp
         - test -f $PREFIX/include/cudf_test/io_metadata_utilities.hpp
         - test -f $PREFIX/include/cudf_test/iterator_utilities.hpp
+        - test -f $PREFIX/include/cudf_test/stream_checking_resource_adapter.hpp
         - test -f $PREFIX/include/cudf_test/table_utilities.hpp
         - test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh
         - test -f $PREFIX/include/cudf_test/type_list_utilities.hpp
diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp
index 99aa414fae3..3260159b409 100644
--- a/cpp/benchmarks/column/concatenate.cpp
+++ b/cpp/benchmarks/column/concatenate.cpp
@@ -49,7 +49,7 @@ static void BM_concatenate(benchmark::State& state)
   CUDF_CHECK_CUDA(0);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     auto result = cudf::concatenate(column_views);
   }
 
@@ -91,7 +91,7 @@ static void BM_concatenate_tables(benchmark::State& state)
   CUDF_CHECK_CUDA(0);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     auto result = cudf::concatenate(table_views);
   }
 
@@ -150,7 +150,7 @@ static void BM_concatenate_strings(benchmark::State& state)
   CUDF_CHECK_CUDA(0);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     auto result = cudf::concatenate(column_views);
   }
 
diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 2bcdaa6760c..50adab71200 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -207,7 +207,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
     } else {
       // Don't need a random seconds generator for sub-second intervals
       seconds_gen = [range_s](thrust::minstd_rand&, size_t size) {
-        rmm::device_uvector<int64_t> result(size, cudf::default_stream_value);
+        rmm::device_uvector<int64_t> result(size, cudf::get_default_stream());
         thrust::fill(thrust::device, result.begin(), result.end(), range_s.second.count());
         return result;
       };
@@ -225,7 +225,7 @@ struct random_value_fn<T, std::enable_if_t<cudf::is_chrono<T>()>> {
   {
     auto const sec = seconds_gen(engine, size);
     auto const ns  = nanoseconds_gen(engine, size);
-    rmm::device_uvector<T> result(size, cudf::default_stream_value);
+    rmm::device_uvector<T> result(size, cudf::get_default_stream());
     thrust::transform(
       thrust::device,
       sec.begin(),
@@ -307,7 +307,7 @@ struct random_value_fn<T, typename std::enable_if_t<std::is_same_v<T, bool>>> {
   random_value_fn(distribution_params<bool> const& desc)
     : dist{[valid_prob = desc.probability_true](thrust::minstd_rand& engine,
                                                 size_t size) -> rmm::device_uvector<bool> {
-        rmm::device_uvector<bool> result(size, cudf::default_stream_value);
+        rmm::device_uvector<bool> result(size, cudf::get_default_stream());
         thrust::tabulate(
           thrust::device, result.begin(), result.end(), bool_generator(engine, valid_prob));
         return result;
@@ -359,7 +359,7 @@ rmm::device_uvector<cudf::size_type> sample_indices_with_run_length(cudf::size_t
         return samples_indices[sample_idx];
       });
     rmm::device_uvector<cudf::size_type> repeated_sample_indices(num_rows,
-                                                                 cudf::default_stream_value);
+                                                                 cudf::get_default_stream());
     thrust::copy(thrust::device,
                  avg_repeated_sample_indices_iterator,
                  avg_repeated_sample_indices_iterator + num_rows,
@@ -401,8 +401,8 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
 
   // Distribution for picking elements from the array of samples
   auto const avg_run_len = profile.get_avg_run_length();
-  rmm::device_uvector<DeviceType> data(0, cudf::default_stream_value);
-  rmm::device_uvector<bool> null_mask(0, cudf::default_stream_value);
+  rmm::device_uvector<DeviceType> data(0, cudf::get_default_stream());
+  rmm::device_uvector<bool> null_mask(0, cudf::get_default_stream());
 
   if (profile.get_cardinality() == 0 and avg_run_len == 1) {
     data      = value_dist(engine, num_rows);
@@ -418,8 +418,8 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
     // generate n samples and gather.
     auto const sample_indices =
       sample_indices_with_run_length(avg_run_len, cardinality, num_rows, engine);
-    data      = rmm::device_uvector<DeviceType>(num_rows, cudf::default_stream_value);
-    null_mask = rmm::device_uvector<bool>(num_rows, cudf::default_stream_value);
+    data      = rmm::device_uvector<DeviceType>(num_rows, cudf::get_default_stream());
+    null_mask = rmm::device_uvector<bool>(num_rows, cudf::get_default_stream());
     thrust::gather(
       thrust::device, sample_indices.begin(), sample_indices.end(), samples.begin(), data.begin());
     thrust::gather(thrust::device,
@@ -498,12 +498,12 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
   auto valid_lengths = thrust::make_transform_iterator(
     thrust::make_zip_iterator(thrust::make_tuple(lengths.begin(), null_mask.begin())),
     valid_or_zero{});
-  rmm::device_uvector<cudf::size_type> offsets(num_rows + 1, cudf::default_stream_value);
+  rmm::device_uvector<cudf::size_type> offsets(num_rows + 1, cudf::get_default_stream());
   thrust::exclusive_scan(
     thrust::device, valid_lengths, valid_lengths + lengths.size(), offsets.begin());
   // offfsets are ready.
   auto chars_length = *thrust::device_pointer_cast(offsets.end() - 1);
-  rmm::device_uvector<char> chars(chars_length, cudf::default_stream_value);
+  rmm::device_uvector<char> chars(chars_length, cudf::get_default_stream());
   thrust::for_each_n(thrust::device,
                      thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
                      num_rows,
diff --git a/cpp/benchmarks/common/random_distribution_factory.cuh b/cpp/benchmarks/common/random_distribution_factory.cuh
index 3cfab858793..36b968c6010 100644
--- a/cpp/benchmarks/common/random_distribution_factory.cuh
+++ b/cpp/benchmarks/common/random_distribution_factory.cuh
@@ -148,7 +148,7 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
     case distribution_id::NORMAL:
       return [lower_bound, upper_bound, dist = make_normal_dist(lower_bound, upper_bound)](
                thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector<T> {
-        rmm::device_uvector<T> result(size, cudf::default_stream_value);
+        rmm::device_uvector<T> result(size, cudf::get_default_stream());
         thrust::tabulate(thrust::device,
                          result.begin(),
                          result.end(),
@@ -158,7 +158,7 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
     case distribution_id::UNIFORM:
       return [lower_bound, upper_bound, dist = make_uniform_dist(lower_bound, upper_bound)](
                thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector<T> {
-        rmm::device_uvector<T> result(size, cudf::default_stream_value);
+        rmm::device_uvector<T> result(size, cudf::get_default_stream());
         thrust::tabulate(thrust::device,
                          result.begin(),
                          result.end(),
@@ -169,7 +169,7 @@ distribution_fn<T> make_distribution(distribution_id dist_id, T lower_bound, T u
       // kind of exponential distribution from lower_bound to upper_bound.
       return [lower_bound, upper_bound, dist = geometric_distribution<T>(lower_bound, upper_bound)](
                thrust::minstd_rand& engine, size_t size) -> rmm::device_uvector<T> {
-        rmm::device_uvector<T> result(size, cudf::default_stream_value);
+        rmm::device_uvector<T> result(size, cudf::get_default_stream());
         thrust::tabulate(thrust::device,
                          result.begin(),
                          result.end(),
diff --git a/cpp/benchmarks/copying/copy_if_else.cpp b/cpp/benchmarks/copying/copy_if_else.cpp
index 82f4e15ecb0..9a153a7094c 100644
--- a/cpp/benchmarks/copying/copy_if_else.cpp
+++ b/cpp/benchmarks/copying/copy_if_else.cpp
@@ -45,7 +45,7 @@ static void BM_copy_if_else(benchmark::State& state, bool nulls)
   cudf::column_view lhs(input->view().column(0));
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::copy_if_else(lhs, rhs, decision);
   }
 }
diff --git a/cpp/benchmarks/copying/shift.cu b/cpp/benchmarks/copying/shift.cu
index a849b7da58b..957313134b3 100644
--- a/cpp/benchmarks/copying/shift.cu
+++ b/cpp/benchmarks/copying/shift.cu
@@ -24,7 +24,7 @@
 template <typename T, typename ScalarType = cudf::scalar_type_t<T>>
 std::unique_ptr<cudf::scalar> make_scalar(
   T value                             = 0,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto s = new ScalarType(value, true, stream, mr);
diff --git a/cpp/benchmarks/groupby/group_max.cpp b/cpp/benchmarks/groupby/group_max.cpp
index 8454d1afee6..4956cce0daf 100644
--- a/cpp/benchmarks/groupby/group_max.cpp
+++ b/cpp/benchmarks/groupby/group_max.cpp
@@ -52,7 +52,7 @@ void bench_groupby_max(nvbench::state& state, nvbench::type_list<Type>)
   requests[0].values = vals->view();
   requests[0].aggregations.push_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync,
              [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });
 }
diff --git a/cpp/benchmarks/groupby/group_nunique.cpp b/cpp/benchmarks/groupby/group_nunique.cpp
index 1f95b5d5899..05698c04058 100644
--- a/cpp/benchmarks/groupby/group_nunique.cpp
+++ b/cpp/benchmarks/groupby/group_nunique.cpp
@@ -65,7 +65,7 @@ void bench_groupby_nunique(nvbench::state& state, nvbench::type_list<Type>)
   auto const requests = make_aggregation_request_vector(
     *vals, cudf::make_nunique_aggregation<cudf::groupby_aggregation>());
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync,
              [&](nvbench::launch& launch) { auto const result = gb_obj.aggregate(requests); });
 }
diff --git a/cpp/benchmarks/groupby/group_struct_keys.cpp b/cpp/benchmarks/groupby/group_struct_keys.cpp
index 227a4d5259a..cc6f0faaf41 100644
--- a/cpp/benchmarks/groupby/group_struct_keys.cpp
+++ b/cpp/benchmarks/groupby/group_struct_keys.cpp
@@ -83,7 +83,7 @@ void bench_groupby_struct_keys(nvbench::state& state)
   requests[0].aggregations.push_back(cudf::make_min_aggregation<cudf::groupby_aggregation>());
 
   // Set up nvbench default stream
-  auto stream = cudf::default_stream_value;
+  auto stream = cudf::get_default_stream();
   state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
 
   state.exec(nvbench::exec_tag::sync,
diff --git a/cpp/benchmarks/hashing/hash.cpp b/cpp/benchmarks/hashing/hash.cpp
index e997bf296c5..1053c2e4694 100644
--- a/cpp/benchmarks/hashing/hash.cpp
+++ b/cpp/benchmarks/hashing/hash.cpp
@@ -35,7 +35,7 @@ static void BM_hash(benchmark::State& state, cudf::hash_id hid, contains_nulls h
     data->get_column(0).set_null_mask(rmm::device_buffer{}, 0);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::hash(data->view(), hid);
   }
 }
diff --git a/cpp/benchmarks/io/csv/csv_reader_input.cpp b/cpp/benchmarks/io/csv/csv_reader_input.cpp
index 4f895e13f1b..27fea856332 100644
--- a/cpp/benchmarks/io/csv/csv_reader_input.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_input.cpp
@@ -47,7 +47,7 @@ void csv_read_common(DataType const& data_types,
     cudf::io::csv_reader_options::builder(source_sink.make_source_info());
 
   auto const mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
              [&](nvbench::launch& launch, auto& timer) {
                try_drop_l3_cache();  // Drop L3 cache for accurate measurement
diff --git a/cpp/benchmarks/io/csv/csv_reader_options.cpp b/cpp/benchmarks/io/csv/csv_reader_options.cpp
index b569dc65f3d..04522c16d5c 100644
--- a/cpp/benchmarks/io/csv/csv_reader_options.cpp
+++ b/cpp/benchmarks/io/csv/csv_reader_options.cpp
@@ -66,7 +66,7 @@ void BM_csv_read_varying_options(
   size_t const chunk_size             = source_sink.size() / num_chunks;
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
   auto const mem_stats_logger         = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
              [&](nvbench::launch& launch, auto& timer) {
                try_drop_l3_cache();  // Drop L3 cache for accurate measurement
diff --git a/cpp/benchmarks/io/json/nested_json.cpp b/cpp/benchmarks/io/json/nested_json.cpp
index bb3e13a3a01..1fe0218bb0f 100644
--- a/cpp/benchmarks/io/json/nested_json.cpp
+++ b/cpp/benchmarks/io/json/nested_json.cpp
@@ -68,16 +68,16 @@ void BM_NESTED_JSON(nvbench::state& state)
   auto const string_size{size_type(state.get_int64("string_size"))};
   auto const default_options = cudf::io::json_reader_options{};
 
-  auto input = make_test_json_data(string_size, cudf::default_stream_value);
+  auto input = make_test_json_data(string_size, cudf::get_default_stream());
   state.add_element_count(input.size());
 
   // Run algorithm
   auto const mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     // Allocate device-side temporary storage & run algorithm
     cudf::io::json::detail::device_parse_nested_json(
-      input, default_options, cudf::default_stream_value);
+      input, default_options, cudf::get_default_stream());
   });
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index 46f14cc4874..8c6f9f32f61 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -38,7 +38,7 @@ void orc_read_common(cudf::io::orc_writer_options const& opts,
     cudf::io::orc_reader_options::builder(source_sink.make_source_info());
 
   auto mem_stats_logger = cudf::memory_stats_logger();  // init stats logger
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
              [&](nvbench::launch& launch, auto& timer) {
                try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index da64fdcac3a..6ca7a494642 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -83,7 +83,7 @@ void BM_orc_read_varying_options(nvbench::state& state,
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
     nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
       try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index ddf699b0eaa..21d903d42ae 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -61,7 +61,7 @@ void BM_orc_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enum
   std::size_t encoded_file_size = 0;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(sink_type);
@@ -112,7 +112,7 @@ void BM_orc_write_io_compression(
   std::size_t encoded_file_size = 0;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(sink_type);
@@ -157,7 +157,7 @@ void BM_orc_write_statistics(
   std::size_t encoded_file_size = 0;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(io_type::FILEPATH);
diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
index daf5e247a02..494b0d0d98e 100644
--- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
@@ -58,7 +58,7 @@ void nvbench_orc_write(nvbench::state& state)
 
   size_t encoded_file_size = 0;
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(io_type::VOID);
@@ -112,7 +112,7 @@ void nvbench_orc_chunked_write(nvbench::state& state)
 
   size_t encoded_file_size = 0;
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
     nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) {
       cuio_source_sink_pair source_sink(io_type::VOID);
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 6477f611421..761cbeb62f8 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -38,7 +38,7 @@ void parquet_read_common(cudf::io::parquet_writer_options const& write_opts,
     cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
              [&](nvbench::launch& launch, auto& timer) {
                try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 3c1e41c89b8..52121859f13 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -86,7 +86,7 @@ void BM_parquet_read_options(nvbench::state& state,
   auto constexpr num_chunks     = 1;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
     nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
       try_drop_l3_cache();
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index 747dd5c086c..1cb83e5b4c8 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -62,7 +62,7 @@ void BM_parq_write_encode(nvbench::state& state, nvbench::type_list<nvbench::enu
   std::size_t encoded_file_size = 0;
 
   auto const mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(sink_type);
@@ -114,7 +114,7 @@ void BM_parq_write_io_compression(
   std::size_t encoded_file_size = 0;
 
   auto const mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(sink_type);
@@ -159,7 +159,7 @@ void BM_parq_write_varying_options(
   std::size_t encoded_file_size = 0;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(io_type::FILEPATH);
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index 6c8500a2a70..e563055194e 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -44,7 +44,7 @@ void PQ_write(nvbench::state& state)
   std::size_t encoded_file_size = 0;
   auto const mem_stats_logger   = cudf::memory_stats_logger();
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
              [&](nvbench::launch& launch, auto& timer) {
                cuio_source_sink_pair source_sink(io_type::VOID);
@@ -81,7 +81,7 @@ void PQ_write_chunked(nvbench::state& state)
   auto const mem_stats_logger   = cudf::memory_stats_logger();
   std::size_t encoded_file_size = 0;
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
     nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) {
       cuio_source_sink_pair source_sink(io_type::VOID);
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index b7e85d8aa7e..380766fee46 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -142,7 +142,7 @@ static void bench_multibyte_split(nvbench::state& state,
       source_type == data_chunk_source_type::file_bgzip) {
     host_input = cudf::detail::make_std_vector_sync<char>(
       {device_input.data(), static_cast<std::size_t>(device_input.size())},
-      cudf::default_stream_value);
+      cudf::get_default_stream());
   }
   if (source_type == data_chunk_source_type::host_pinned) {
     host_pinned_input.resize(static_cast<std::size_t>(device_input.size()));
@@ -184,7 +184,7 @@ static void bench_multibyte_split(nvbench::state& state,
   cudf::io::text::byte_range_info range{range_offset, range_size};
   std::unique_ptr<cudf::column> output;
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     try_drop_l3_cache();
     output = cudf::io::text::multibyte_split(*source, delim, range);
diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
index c121d070ca0..381cbe4824b 100644
--- a/cpp/benchmarks/iterator/iterator.cu
+++ b/cpp/benchmarks/iterator/iterator.cu
@@ -56,7 +56,7 @@ inline auto reduce_by_cub(OutputIterator result, InputIterator d_in, int num_ite
     nullptr, temp_storage_bytes, d_in, result, num_items, cudf::DeviceSum{}, init);
 
   // Allocate temporary storage
-  rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::default_stream_value);
+  rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::get_default_stream());
 
   // Run reduction
   cub::DeviceReduce::Reduce(
diff --git a/cpp/benchmarks/join/generate_input_tables.cuh b/cpp/benchmarks/join/generate_input_tables.cuh
index 31cef581f22..c606cd8b4c0 100644
--- a/cpp/benchmarks/join/generate_input_tables.cuh
+++ b/cpp/benchmarks/join/generate_input_tables.cuh
@@ -154,7 +154,7 @@ void generate_input_tables(key_type* const build_tbl,
 
   const int num_states =
     num_sms * std::max(num_blocks_init_build_tbl, num_blocks_init_probe_tbl) * block_size;
-  rmm::device_uvector<curandState> devStates(num_states, cudf::default_stream_value);
+  rmm::device_uvector<curandState> devStates(num_states, cudf::get_default_stream());
 
   init_curand<<<(num_states - 1) / block_size + 1, block_size>>>(devStates.data(), num_states);
 
diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp
index 1a87c2d1158..d4fb0862506 100644
--- a/cpp/benchmarks/join/join_common.hpp
+++ b/cpp/benchmarks/join/join_common.hpp
@@ -142,7 +142,7 @@ static void BM_join(state_type& state, Join JoinFunc)
   // Benchmark the inner join operation
   if constexpr (std::is_same_v<state_type, benchmark::State> and (not is_conditional)) {
     for (auto _ : state) {
-      cuda_event_timer raii(state, true, cudf::default_stream_value);
+      cuda_event_timer raii(state, true, cudf::get_default_stream());
 
       auto result = JoinFunc(probe_table.select(columns_to_join),
                              build_table.select(columns_to_join),
@@ -168,7 +168,7 @@ static void BM_join(state_type& state, Join JoinFunc)
       cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0);
 
     for (auto _ : state) {
-      cuda_event_timer raii(state, true, cudf::default_stream_value);
+      cuda_event_timer raii(state, true, cudf::get_default_stream());
 
       auto result =
         JoinFunc(probe_table, build_table, left_zero_eq_right_zero, cudf::null_equality::UNEQUAL);
diff --git a/cpp/benchmarks/lists/copying/scatter_lists.cu b/cpp/benchmarks/lists/copying/scatter_lists.cu
index d86fb0578e5..02ad97fee11 100644
--- a/cpp/benchmarks/lists/copying/scatter_lists.cu
+++ b/cpp/benchmarks/lists/copying/scatter_lists.cu
@@ -40,7 +40,7 @@ class ScatterLists : public cudf::benchmark {
 template <class TypeParam, bool coalesce>
 void BM_lists_scatter(::benchmark::State& state)
 {
-  auto stream = cudf::default_stream_value;
+  auto stream = cudf::get_default_stream();
   auto mr     = rmm::mr::get_current_device_resource();
 
   const size_type base_size{(size_type)state.range(0)};
diff --git a/cpp/benchmarks/quantiles/quantiles.cpp b/cpp/benchmarks/quantiles/quantiles.cpp
index 7c0a88584f8..599cff2bcda 100644
--- a/cpp/benchmarks/quantiles/quantiles.cpp
+++ b/cpp/benchmarks/quantiles/quantiles.cpp
@@ -50,7 +50,7 @@ static void BM_quantiles(benchmark::State& state, bool nulls)
     thrust::seq, q.begin(), q.end(), [n_quantiles](auto i) { return i * (1.0f / n_quantiles); });
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
 
     auto result = cudf::quantiles(input, q);
     // auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input);
diff --git a/cpp/benchmarks/reduction/segment_reduce.cu b/cpp/benchmarks/reduction/segment_reduce.cu
index d2c15c87c2b..e063adb25f9 100644
--- a/cpp/benchmarks/reduction/segment_reduce.cu
+++ b/cpp/benchmarks/reduction/segment_reduce.cu
@@ -109,7 +109,7 @@ void BM_Simple_Segmented_Reduction(nvbench::state& state,
   auto const input_view  = input->view();
   auto const offset_span = cudf::device_span<cudf::size_type>{offsets};
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(
     nvbench::exec_tag::sync, [input_view, output_type, offset_span, &agg](nvbench::launch& launch) {
       segmented_reduce(input_view, offset_span, *agg, output_type, cudf::null_policy::INCLUDE);
diff --git a/cpp/benchmarks/sort/rank.cpp b/cpp/benchmarks/sort/rank.cpp
index 66277443800..2c26f4fa15d 100644
--- a/cpp/benchmarks/sort/rank.cpp
+++ b/cpp/benchmarks/sort/rank.cpp
@@ -37,7 +37,7 @@ static void BM_rank(benchmark::State& state, bool nulls)
   auto keys = create_random_column(cudf::type_to_id<Type>(), row_count{n_rows}, profile);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
 
     auto result = cudf::rank(keys->view(),
                              cudf::rank_method::FIRST,
diff --git a/cpp/benchmarks/sort/sort.cpp b/cpp/benchmarks/sort/sort.cpp
index 13502ce0959..304bac06632 100644
--- a/cpp/benchmarks/sort/sort.cpp
+++ b/cpp/benchmarks/sort/sort.cpp
@@ -42,7 +42,7 @@ static void BM_sort(benchmark::State& state, bool nulls)
   cudf::table_view input{*input_table};
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
 
     auto result = (stable) ? cudf::stable_sorted_order(input) : cudf::sorted_order(input);
   }
diff --git a/cpp/benchmarks/sort/sort_strings.cpp b/cpp/benchmarks/sort/sort_strings.cpp
index 701b392f80b..572c05d69cb 100644
--- a/cpp/benchmarks/sort/sort_strings.cpp
+++ b/cpp/benchmarks/sort/sort_strings.cpp
@@ -32,7 +32,7 @@ static void BM_sort(benchmark::State& state)
   auto const table = create_random_table({cudf::type_id::STRING}, row_count{n_rows});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::sort(table->view());
   }
 }
diff --git a/cpp/benchmarks/stream_compaction/distinct.cpp b/cpp/benchmarks/stream_compaction/distinct.cpp
index 23960b24b89..512554ff1bc 100644
--- a/cpp/benchmarks/stream_compaction/distinct.cpp
+++ b/cpp/benchmarks/stream_compaction/distinct.cpp
@@ -41,7 +41,7 @@ void nvbench_distinct(nvbench::state& state, nvbench::type_list<Type>)
   auto input_column = source_column->view();
   auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     auto result = cudf::distinct(input_table,
                                  {0},
@@ -83,7 +83,7 @@ void nvbench_distinct_list(nvbench::state& state, nvbench::type_list<Type>)
   auto const table = create_random_table(
     {dtype}, table_size_bytes{static_cast<size_t>(size)}, data_profile{builder}, 0);
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     auto result = cudf::distinct(*table,
                                  {0},
diff --git a/cpp/benchmarks/stream_compaction/unique.cpp b/cpp/benchmarks/stream_compaction/unique.cpp
index bcf9628b19f..652d55fb8ce 100644
--- a/cpp/benchmarks/stream_compaction/unique.cpp
+++ b/cpp/benchmarks/stream_compaction/unique.cpp
@@ -62,7 +62,7 @@ void nvbench_unique(nvbench::state& state, nvbench::type_list<Type, nvbench::enu
   auto input_column = source_column->view();
   auto input_table  = cudf::table_view({input_column, input_column, input_column, input_column});
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     auto result = cudf::unique(input_table, {0}, Keep, cudf::null_equality::EQUAL);
   });
diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
index 1c43fa0f077..72b6fcaff0e 100644
--- a/cpp/benchmarks/string/case.cpp
+++ b/cpp/benchmarks/string/case.cpp
@@ -32,7 +32,7 @@ static void BM_case(benchmark::State& state)
   cudf::strings_column_view input(column->view());
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::to_lower(input);
   }
 
diff --git a/cpp/benchmarks/string/combine.cpp b/cpp/benchmarks/string/combine.cpp
index a8d0224916b..46bcda9ae92 100644
--- a/cpp/benchmarks/string/combine.cpp
+++ b/cpp/benchmarks/string/combine.cpp
@@ -41,7 +41,7 @@ static void BM_combine(benchmark::State& state)
   cudf::string_scalar separator("+");
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::concatenate(table->view(), separator);
   }
 
diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp
index fd04d599e5e..f7f394ea048 100644
--- a/cpp/benchmarks/string/contains.cpp
+++ b/cpp/benchmarks/string/contains.cpp
@@ -85,7 +85,7 @@ static void BM_contains(benchmark::State& state, contains_type ct)
   auto pattern = patterns[pattern_index];
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (ct) {
       case contains_type::contains:  // contains_re and matches_re use the same main logic
         cudf::strings::contains_re(input, pattern);
diff --git a/cpp/benchmarks/string/copy.cu b/cpp/benchmarks/string/copy.cu
index 318d2d524a3..669b12aa56b 100644
--- a/cpp/benchmarks/string/copy.cu
+++ b/cpp/benchmarks/string/copy.cu
@@ -58,7 +58,7 @@ static void BM_copy(benchmark::State& state, copy_type ct)
                        thrust::default_random_engine());
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (ct) {
       case gather: cudf::gather(source->view(), index_map); break;
       case scatter: cudf::scatter(source->view(), index_map, target->view()); break;
diff --git a/cpp/benchmarks/string/factory.cu b/cpp/benchmarks/string/factory.cu
index 0e937b91e98..b75de16e901 100644
--- a/cpp/benchmarks/string/factory.cu
+++ b/cpp/benchmarks/string/factory.cu
@@ -55,7 +55,7 @@ static void BM_factory(benchmark::State& state)
     cudf::type_id::STRING, distribution_id::NORMAL, 0, max_str_length);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
   auto d_column     = cudf::column_device_view::create(column->view());
-  rmm::device_uvector<string_pair> pairs(d_column->size(), cudf::default_stream_value);
+  rmm::device_uvector<string_pair> pairs(d_column->size(), cudf::get_default_stream());
   thrust::transform(thrust::device,
                     d_column->pair_begin<cudf::string_view, true>(),
                     d_column->pair_end<cudf::string_view, true>(),
@@ -63,7 +63,7 @@ static void BM_factory(benchmark::State& state)
                     string_view_to_pair{});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::make_strings_column(pairs);
   }
 
diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp
index 4001fef5da6..f07c11ee6ca 100644
--- a/cpp/benchmarks/string/filter.cpp
+++ b/cpp/benchmarks/string/filter.cpp
@@ -49,7 +49,7 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api)
     {cudf::char_utf8{'a'}, cudf::char_utf8{'c'}}};
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (api) {
       case filter: cudf::strings::filter_characters_of_type(input, types); break;
       case filter_chars: cudf::strings::filter_characters(input, filter_table); break;
diff --git a/cpp/benchmarks/string/find.cpp b/cpp/benchmarks/string/find.cpp
index 62c76d18e1a..4ff3b59a491 100644
--- a/cpp/benchmarks/string/find.cpp
+++ b/cpp/benchmarks/string/find.cpp
@@ -45,7 +45,7 @@ static void BM_find_scalar(benchmark::State& state, FindAPI find_api)
   cudf::test::strings_column_wrapper targets({"+", "-"});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (find_api) {
       case find: cudf::strings::find(input, target); break;
       case find_multi:
diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp
index f6649b186a4..de7382f5a75 100644
--- a/cpp/benchmarks/string/like.cpp
+++ b/cpp/benchmarks/string/like.cpp
@@ -81,7 +81,7 @@ static void bench_like(nvbench::state& state)
   // This pattern forces reading the entire target string (when matched expected)
   auto pattern = std::string("% 5W4_");  // regex equivalent: ".* 5W4."
 
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::default_stream_value.value()));
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   // gather some throughput statistics as well
   auto chars_size = input.chars_size();
   state.add_element_count(chars_size, "chars_size");           // number of bytes;
diff --git a/cpp/benchmarks/string/repeat_strings.cpp b/cpp/benchmarks/string/repeat_strings.cpp
index db02fec13c2..1844e93bc53 100644
--- a/cpp/benchmarks/string/repeat_strings.cpp
+++ b/cpp/benchmarks/string/repeat_strings.cpp
@@ -55,7 +55,7 @@ static void BM_repeat_strings_scalar_times(benchmark::State& state)
   auto const strings_col    = cudf::strings_column_view(table->view().column(0));
 
   for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value);
+    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::repeat_strings(strings_col, default_repeat_times);
   }
 
@@ -71,7 +71,7 @@ static void BM_repeat_strings_column_times(benchmark::State& state)
   auto const repeat_times_col = table->view().column(1);
 
   for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value);
+    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::repeat_strings(strings_col, repeat_times_col);
   }
 
@@ -88,7 +88,7 @@ static void BM_compute_output_strings_sizes(benchmark::State& state)
   auto const repeat_times_col = table->view().column(1);
 
   for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value);
+    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
   }
 
@@ -107,7 +107,7 @@ static void BM_repeat_strings_column_times_precomputed_sizes(benchmark::State& s
     cudf::strings::repeat_strings_output_sizes(strings_col, repeat_times_col);
 
   for ([[maybe_unused]] auto _ : state) {
-    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::default_stream_value);
+    [[maybe_unused]] cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::repeat_strings(strings_col, repeat_times_col, *sizes);
   }
 
diff --git a/cpp/benchmarks/string/replace.cpp b/cpp/benchmarks/string/replace.cpp
index e25bf679dbc..b25af14ec2a 100644
--- a/cpp/benchmarks/string/replace.cpp
+++ b/cpp/benchmarks/string/replace.cpp
@@ -48,7 +48,7 @@ static void BM_replace(benchmark::State& state, replace_type rt)
   cudf::test::strings_column_wrapper repls({"", ""});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (rt) {
       case scalar: cudf::strings::replace(input, target, repl); break;
       case slice: cudf::strings::replace_slice(input, repl, 1, 10); break;
diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp
index f8b03daa338..7e9d6036750 100644
--- a/cpp/benchmarks/string/replace_re.cpp
+++ b/cpp/benchmarks/string/replace_re.cpp
@@ -42,7 +42,7 @@ static void BM_replace(benchmark::State& state, replace_type rt)
   cudf::test::strings_column_wrapper repls({"#", ""});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (rt) {
       case replace_type::replace_re:  // contains_re and matches_re use the same main logic
         cudf::strings::replace_re(input, "\\d+");
diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp
index 3a7a96b025d..0f005c462cc 100644
--- a/cpp/benchmarks/string/split.cpp
+++ b/cpp/benchmarks/string/split.cpp
@@ -43,7 +43,7 @@ static void BM_split(benchmark::State& state, split_type rt)
   cudf::string_scalar target("+");
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (rt) {
       case split: cudf::strings::split(input, target); break;
       case split_ws: cudf::strings::split(input); break;
diff --git a/cpp/benchmarks/string/substring.cpp b/cpp/benchmarks/string/substring.cpp
index 7ae5ad6f581..1201b240013 100644
--- a/cpp/benchmarks/string/substring.cpp
+++ b/cpp/benchmarks/string/substring.cpp
@@ -52,7 +52,7 @@ static void BM_substring(benchmark::State& state, substring_type rt)
   cudf::test::strings_column_wrapper delimiters(delim_itr, delim_itr + n_rows);
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (rt) {
       case position: cudf::strings::slice_strings(input, 1, max_str_length / 2); break;
       case multi_position: cudf::strings::slice_strings(input, starts, stops); break;
diff --git a/cpp/benchmarks/string/translate.cpp b/cpp/benchmarks/string/translate.cpp
index 359a3756ef2..efc2fa3154b 100644
--- a/cpp/benchmarks/string/translate.cpp
+++ b/cpp/benchmarks/string/translate.cpp
@@ -53,7 +53,7 @@ static void BM_translate(benchmark::State& state, int entry_count)
                  });
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     cudf::strings::translate(input, entries);
   }
 
diff --git a/cpp/benchmarks/string/url_decode.cu b/cpp/benchmarks/string/url_decode.cu
index a884bc8b587..44681c924d0 100644
--- a/cpp/benchmarks/string/url_decode.cu
+++ b/cpp/benchmarks/string/url_decode.cu
@@ -91,7 +91,7 @@ void BM_url_decode(benchmark::State& state, int esc_seq_pct)
   auto strings_view = cudf::strings_column_view(column->view());
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     auto result = cudf::strings::url_decode(strings_view);
   }
 
diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp
index e5882ff1c16..ebff1ff888d 100644
--- a/cpp/benchmarks/synchronization/synchronization.hpp
+++ b/cpp/benchmarks/synchronization/synchronization.hpp
@@ -35,7 +35,7 @@
       for (auto _ : state){
 
         // default stream, could be another stream
-        rmm::cuda_stream_view stream{cudf::default_stream_value};
+        rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
         // Create (Construct) an object of this class. You HAVE to pass in the
         // benchmark::State object you are using. It measures the time from its
@@ -85,7 +85,7 @@ class cuda_event_timer {
    */
   cuda_event_timer(benchmark::State& state,
                    bool flush_l2_cache,
-                   rmm::cuda_stream_view stream = cudf::default_stream_value);
+                   rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   // The user must provide a benchmark::State object to set
   // the timer so we disable the default c'tor.
diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp
index e5a0a1a95f4..91d873224d3 100644
--- a/cpp/benchmarks/text/normalize.cpp
+++ b/cpp/benchmarks/text/normalize.cpp
@@ -37,7 +37,7 @@ static void BM_normalize(benchmark::State& state, bool to_lower)
   cudf::strings_column_view input(column->view());
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     nvtext::normalize_characters(input, to_lower);
   }
 
diff --git a/cpp/benchmarks/text/normalize_spaces.cpp b/cpp/benchmarks/text/normalize_spaces.cpp
index 414cd119575..85eaf54d4ea 100644
--- a/cpp/benchmarks/text/normalize_spaces.cpp
+++ b/cpp/benchmarks/text/normalize_spaces.cpp
@@ -38,7 +38,7 @@ static void BM_normalize(benchmark::State& state)
   cudf::strings_column_view input(column->view());
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     nvtext::normalize_spaces(input);
   }
 
diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp
index 4d8df6ae37c..4695a62f1c0 100644
--- a/cpp/benchmarks/text/tokenize.cpp
+++ b/cpp/benchmarks/text/tokenize.cpp
@@ -44,7 +44,7 @@ static void BM_tokenize(benchmark::State& state, tokenize_type tt)
   cudf::test::strings_column_wrapper delimiters({" ", "+", "-"});
 
   for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::default_stream_value);
+    cuda_event_timer raii(state, true, cudf::get_default_stream());
     switch (tt) {
       case tokenize_type::single:
         // single whitespace delimiter
diff --git a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
index b1d2498f0e6..34b1e0254dd 100644
--- a/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
+++ b/cpp/benchmarks/type_dispatcher/type_dispatcher.cu
@@ -188,10 +188,10 @@ void type_dispatcher_benchmark(::benchmark::State& state)
   std::vector<rmm::device_buffer> h_vec(n_cols);
   std::vector<TypeParam*> h_vec_p(n_cols);
   std::transform(h_vec.begin(), h_vec.end(), h_vec_p.begin(), [source_size](auto& col) {
-    col.resize(source_size * sizeof(TypeParam), cudf::default_stream_value);
+    col.resize(source_size * sizeof(TypeParam), cudf::get_default_stream());
     return static_cast<TypeParam*>(col.data());
   });
-  rmm::device_uvector<TypeParam*> d_vec(n_cols, cudf::default_stream_value);
+  rmm::device_uvector<TypeParam*> d_vec(n_cols, cudf::get_default_stream());
 
   if (dispatching_type == NO_DISPATCHING) {
     CUDF_CUDA_TRY(cudaMemcpy(
diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index c82fd1b52a1..554a38e03e5 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -232,7 +232,7 @@ namespace binops {
 std::pair<rmm::device_buffer, size_type> scalar_col_valid_mask_and(
   column_view const& col,
   scalar const& s,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 namespace compiled {
@@ -255,7 +255,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
                                     bool is_lhs_scalar,
                                     bool is_rhs_scalar,
                                     binary_operator op,
-                                    rmm::cuda_stream_view stream = cudf::default_stream_value);
+                                    rmm::cuda_stream_view stream = cudf::get_default_stream());
 }  // namespace detail
 }  // namespace compiled
 }  // namespace binops
diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index c5f6d339ae9..4f42910856f 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -64,7 +64,7 @@ class column {
    * @param mr Device memory resource to use for all device memory allocations
    */
   column(column const& other,
-         rmm::cuda_stream_view stream        = cudf::default_stream_value,
+         rmm::cuda_stream_view stream        = cudf::get_default_stream(),
          rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -146,7 +146,7 @@ class column {
    * @param mr Device memory resource to use for all device memory allocations
    */
   explicit column(column_view view,
-                  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -208,7 +208,7 @@ class column {
    */
   void set_null_mask(rmm::device_buffer const& new_null_mask,
                      size_type new_null_count     = UNKNOWN_NULL_COUNT,
-                     rmm::cuda_stream_view stream = cudf::default_stream_value);
+                     rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @brief Updates the count of null elements.
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index 4f9a09fb621..1361866d0aa 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -821,7 +821,7 @@ class alignas(16) column_device_view : public detail::column_device_view_base {
    *`source_view` available in device memory.
    */
   static std::unique_ptr<column_device_view, std::function<void(column_device_view*)>> create(
-    column_view source_view, rmm::cuda_stream_view stream = cudf::default_stream_value);
+    column_view source_view, rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @brief Destroy the `column_device_view` object.
@@ -974,7 +974,7 @@ class alignas(16) mutable_column_device_view : public detail::column_device_view
   static std::unique_ptr<mutable_column_device_view,
                          std::function<void(mutable_column_device_view*)>>
   create(mutable_column_view source_view,
-         rmm::cuda_stream_view stream = cudf::default_stream_value);
+         rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @brief Returns pointer to the base device memory allocation casted to
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index 5c691d866bd..85f4deecb1d 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -75,7 +75,7 @@ std::unique_ptr<column> make_numeric_column(
   data_type type,
   size_type size,
   mask_state state                    = mask_state::UNALLOCATED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -101,7 +101,7 @@ std::unique_ptr<column> make_numeric_column(
   size_type size,
   B&& null_mask,
   size_type null_count                = cudf::UNKNOWN_NULL_COUNT,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
@@ -132,7 +132,7 @@ std::unique_ptr<column> make_fixed_point_column(
   data_type type,
   size_type size,
   mask_state state                    = mask_state::UNALLOCATED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -157,7 +157,7 @@ std::unique_ptr<column> make_fixed_point_column(
   size_type size,
   B&& null_mask,
   size_type null_count                = cudf::UNKNOWN_NULL_COUNT,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
@@ -189,7 +189,7 @@ std::unique_ptr<column> make_timestamp_column(
   data_type type,
   size_type size,
   mask_state state                    = mask_state::UNALLOCATED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -215,7 +215,7 @@ std::unique_ptr<column> make_timestamp_column(
   size_type size,
   B&& null_mask,
   size_type null_count                = cudf::UNKNOWN_NULL_COUNT,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
@@ -247,7 +247,7 @@ std::unique_ptr<column> make_duration_column(
   data_type type,
   size_type size,
   mask_state state                    = mask_state::UNALLOCATED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -273,7 +273,7 @@ std::unique_ptr<column> make_duration_column(
   size_type size,
   B&& null_mask,
   size_type null_count                = cudf::UNKNOWN_NULL_COUNT,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
@@ -305,7 +305,7 @@ std::unique_ptr<column> make_fixed_width_column(
   data_type type,
   size_type size,
   mask_state state                    = mask_state::UNALLOCATED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -331,7 +331,7 @@ std::unique_ptr<column> make_fixed_width_column(
   size_type size,
   B&& null_mask,
   size_type null_count                = cudf::UNKNOWN_NULL_COUNT,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(is_fixed_width(type), "Invalid, non-fixed-width type.");
@@ -370,7 +370,7 @@ std::unique_ptr<column> make_fixed_width_column(
  */
 std::unique_ptr<column> make_strings_column(
   cudf::device_span<thrust::pair<const char*, size_type> const> strings,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -402,7 +402,7 @@ std::unique_ptr<column> make_strings_column(
 std::unique_ptr<column> make_strings_column(
   cudf::device_span<string_view const> string_views,
   const string_view null_placeholder,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -438,7 +438,7 @@ std::unique_ptr<column> make_strings_column(
   cudf::device_span<size_type const> offsets,
   cudf::device_span<bitmask_type const> null_mask = {},
   size_type null_count                            = cudf::UNKNOWN_NULL_COUNT,
-  rmm::cuda_stream_view stream                    = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                    = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr             = rmm::mr::get_current_device_resource());
 
 /**
@@ -547,7 +547,7 @@ std::unique_ptr<cudf::column> make_lists_column(
   std::unique_ptr<column> child_column,
   size_type null_count,
   rmm::device_buffer&& null_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -578,7 +578,7 @@ std::unique_ptr<cudf::column> make_structs_column(
   std::vector<std::unique_ptr<column>>&& child_columns,
   size_type null_count,
   rmm::device_buffer&& null_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -598,7 +598,7 @@ std::unique_ptr<cudf::column> make_structs_column(
 std::unique_ptr<column> make_column_from_scalar(
   scalar const& s,
   size_type size,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -618,7 +618,7 @@ std::unique_ptr<column> make_column_from_scalar(
 std::unique_ptr<column> make_dictionary_from_scalar(
   scalar const& s,
   size_type size,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp
index 8deac88a645..944f2eef743 100644
--- a/cpp/include/cudf/detail/binaryop.hpp
+++ b/cpp/include/cudf/detail/binaryop.hpp
@@ -35,7 +35,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   std::string const& ptx,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -49,7 +49,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -63,7 +63,7 @@ std::unique_ptr<column> binary_operation(
   scalar const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -77,7 +77,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp
index 08a37acead2..ae5c95c4645 100644
--- a/cpp/include/cudf/detail/concatenate.hpp
+++ b/cpp/include/cudf/detail/concatenate.hpp
@@ -35,7 +35,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns_to_concat,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -45,7 +45,7 @@ std::unique_ptr<column> concatenate(
  */
 std::unique_ptr<table> concatenate(
   host_span<table_view const> tables_to_concat,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index a2cbe8c5238..23ed9090f05 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -77,7 +77,7 @@ ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type
  */
 std::vector<column_view> slice(column_view const& input,
                                host_span<size_type const> indices,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 /**
  * @copydoc cudf::slice(column_view const&, std::initializer_list<size_type>)
  *
@@ -85,7 +85,7 @@ std::vector<column_view> slice(column_view const& input,
  */
 std::vector<column_view> slice(column_view const& input,
                                std::initializer_list<size_type> indices,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::slice(table_view const&, host_span<size_type const>)
@@ -94,7 +94,7 @@ std::vector<column_view> slice(column_view const& input,
  */
 std::vector<table_view> slice(table_view const& input,
                               host_span<size_type const> indices,
-                              rmm::cuda_stream_view stream = cudf::default_stream_value);
+                              rmm::cuda_stream_view stream = cudf::get_default_stream());
 /**
  * @copydoc cudf::slice(table_view const&, std::initializer_list<size_type>)
  *
@@ -102,7 +102,7 @@ std::vector<table_view> slice(table_view const& input,
  */
 std::vector<table_view> slice(table_view const& input,
                               std::initializer_list<size_type> indices,
-                              rmm::cuda_stream_view stream = cudf::default_stream_value);
+                              rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::split(column_view const&, host_span<size_type const>)
@@ -111,7 +111,7 @@ std::vector<table_view> slice(table_view const& input,
  */
 std::vector<column_view> split(column_view const& input,
                                host_span<size_type const> splits,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 /**
  * @copydoc cudf::split(column_view const&, std::initializer_list<size_type>)
  *
@@ -119,7 +119,7 @@ std::vector<column_view> split(column_view const& input,
  */
 std::vector<column_view> split(column_view const& input,
                                std::initializer_list<size_type> splits,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::split(table_view const&, host_span<size_type const>)
@@ -128,7 +128,7 @@ std::vector<column_view> split(column_view const& input,
  */
 std::vector<table_view> split(table_view const& input,
                               host_span<size_type const> splits,
-                              rmm::cuda_stream_view stream = cudf::default_stream_value);
+                              rmm::cuda_stream_view stream = cudf::get_default_stream());
 /**
  * @copydoc cudf::split(table_view const&, std::initializer_list<size_type>)
  *
@@ -136,7 +136,7 @@ std::vector<table_view> split(table_view const& input,
  */
 std::vector<table_view> split(table_view const& input,
                               std::initializer_list<size_type> splits,
-                              rmm::cuda_stream_view stream = cudf::default_stream_value);
+                              rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::shift(column_view const&,size_type,scalar const&,
@@ -148,7 +148,7 @@ std::unique_ptr<column> shift(
   column_view const& input,
   size_type offset,
   scalar const& fill_value,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -189,7 +189,7 @@ std::unique_ptr<column> segmented_shift(
   device_span<size_type const> segment_offsets,
   size_type offset,
   scalar const& fill_value,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -200,7 +200,7 @@ std::unique_ptr<column> segmented_shift(
 std::vector<packed_table> contiguous_split(
   cudf::table_view const& input,
   std::vector<size_type> const& splits,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -209,7 +209,7 @@ std::vector<packed_table> contiguous_split(
  * @param stream Optional CUDA stream on which to execute kernels
  **/
 packed_columns pack(cudf::table_view const& input,
-                    rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                    rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -222,7 +222,7 @@ std::unique_ptr<column> allocate_like(
   column_view const& input,
   size_type size,
   mask_allocation_policy mask_alloc   = mask_allocation_policy::RETAIN,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -235,7 +235,7 @@ std::unique_ptr<column> copy_if_else(
   column_view const& lhs,
   column_view const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -248,7 +248,7 @@ std::unique_ptr<column> copy_if_else(
   scalar const& lhs,
   column_view const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -261,7 +261,7 @@ std::unique_ptr<column> copy_if_else(
   column_view const& lhs,
   scalar const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -274,7 +274,7 @@ std::unique_ptr<column> copy_if_else(
   scalar const& lhs,
   scalar const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -287,7 +287,7 @@ std::unique_ptr<table> sample(
   size_type const n,
   sample_with_replacement replacement = sample_with_replacement::FALSE,
   int64_t const seed                  = 0,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -298,7 +298,7 @@ std::unique_ptr<table> sample(
 std::unique_ptr<scalar> get_element(
   column_view const& input,
   size_type index,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -307,7 +307,7 @@ std::unique_ptr<scalar> get_element(
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 bool has_nonempty_nulls(column_view const& input,
-                        rmm::cuda_stream_view stream = cudf::default_stream_value);
+                        rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::may_have_nonempty_nulls
@@ -315,7 +315,7 @@ bool has_nonempty_nulls(column_view const& input,
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 bool may_have_nonempty_nulls(column_view const& input,
-                             rmm::cuda_stream_view stream = cudf::default_stream_value);
+                             rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
index 99d9f5181c7..229d96659df 100644
--- a/cpp/include/cudf/detail/copy_if.cuh
+++ b/cpp/include/cudf/detail/copy_if.cuh
@@ -323,7 +323,7 @@ template <typename Filter>
 std::unique_ptr<table> copy_if(
   table_view const& input,
   Filter filter,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh
index aaba729f2f2..09cbf706d5c 100644
--- a/cpp/include/cudf/detail/copy_range.cuh
+++ b/cpp/include/cudf/detail/copy_range.cuh
@@ -135,7 +135,7 @@ void copy_range(SourceValueIterator source_value_begin,
                 mutable_column_view& target,
                 size_type target_begin,
                 size_type target_end,
-                rmm::cuda_stream_view stream = cudf::default_stream_value)
+                rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   CUDF_EXPECTS((target_begin <= target_end) && (target_begin >= 0) &&
                  (target_begin < target.size()) && (target_end <= target.size()),
@@ -196,7 +196,7 @@ void copy_range_in_place(column_view const& source,
                          size_type source_begin,
                          size_type source_end,
                          size_type target_begin,
-                         rmm::cuda_stream_view stream = cudf::default_stream_value);
+                         rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::copy_range
@@ -209,7 +209,7 @@ std::unique_ptr<column> copy_range(
   size_type source_begin,
   size_type source_end,
   size_type target_begin,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index d17e641533e..d2bca74ee9b 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -31,7 +31,7 @@ namespace detail {
  */
 std::unique_ptr<cudf::column> extract_year(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -41,7 +41,7 @@ std::unique_ptr<cudf::column> extract_year(
  */
 std::unique_ptr<cudf::column> extract_month(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -51,7 +51,7 @@ std::unique_ptr<cudf::column> extract_month(
  */
 std::unique_ptr<cudf::column> extract_day(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -61,7 +61,7 @@ std::unique_ptr<cudf::column> extract_day(
  */
 std::unique_ptr<cudf::column> extract_weekday(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -71,7 +71,7 @@ std::unique_ptr<cudf::column> extract_weekday(
  */
 std::unique_ptr<cudf::column> extract_hour(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -81,7 +81,7 @@ std::unique_ptr<cudf::column> extract_hour(
  */
 std::unique_ptr<cudf::column> extract_minute(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -91,7 +91,7 @@ std::unique_ptr<cudf::column> extract_minute(
  */
 std::unique_ptr<cudf::column> extract_second(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -102,7 +102,7 @@ std::unique_ptr<cudf::column> extract_second(
  */
 std::unique_ptr<cudf::column> extract_millisecond_fraction(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -113,7 +113,7 @@ std::unique_ptr<cudf::column> extract_millisecond_fraction(
  */
 std::unique_ptr<cudf::column> extract_microsecond_fraction(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -124,7 +124,7 @@ std::unique_ptr<cudf::column> extract_microsecond_fraction(
  */
 std::unique_ptr<cudf::column> extract_nanosecond_fraction(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -134,7 +134,7 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
  */
 std::unique_ptr<cudf::column> last_day_of_month(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -144,7 +144,7 @@ std::unique_ptr<cudf::column> last_day_of_month(
  */
 std::unique_ptr<cudf::column> day_of_year(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -156,7 +156,7 @@ std::unique_ptr<cudf::column> day_of_year(
 std::unique_ptr<cudf::column> add_calendrical_months(
   cudf::column_view const& timestamps,
   cudf::column_view const& months,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -168,7 +168,7 @@ std::unique_ptr<cudf::column> add_calendrical_months(
 std::unique_ptr<cudf::column> add_calendrical_months(
   cudf::column_view const& timestamps,
   cudf::scalar const& months,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -178,12 +178,12 @@ std::unique_ptr<cudf::column> add_calendrical_months(
  */
 std::unique_ptr<cudf::column> is_leap_year(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<cudf::column> extract_quarter(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp
index f236fa7fd43..3ac62c984fb 100644
--- a/cpp/include/cudf/detail/fill.hpp
+++ b/cpp/include/cudf/detail/fill.hpp
@@ -36,7 +36,7 @@ void fill_in_place(mutable_column_view& destination,
                    size_type begin,
                    size_type end,
                    scalar const& value,
-                   rmm::cuda_stream_view stream = cudf::default_stream_value);
+                   rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::fill
@@ -48,7 +48,7 @@ std::unique_ptr<column> fill(
   size_type begin,
   size_type end,
   scalar const& value,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 8bb117c3dd0..2acdc007afa 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -652,7 +652,7 @@ std::unique_ptr<table> gather(
   MapIterator gather_map_begin,
   MapIterator gather_map_end,
   out_of_bounds_policy bounds_policy  = out_of_bounds_policy::DONT_CHECK,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   std::vector<std::unique_ptr<column>> destination_columns;
diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
index fccad73591e..2f6a9525b4e 100644
--- a/cpp/include/cudf/detail/gather.hpp
+++ b/cpp/include/cudf/detail/gather.hpp
@@ -66,7 +66,7 @@ std::unique_ptr<table> gather(
   column_view const& gather_map,
   out_of_bounds_policy bounds_policy,
   negative_index_policy neg_indices,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -81,7 +81,7 @@ std::unique_ptr<table> gather(
   device_span<size_type const> const gather_map,
   out_of_bounds_policy bounds_policy,
   negative_index_policy neg_indices,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
index faf92c996d1..6742e7d9159 100644
--- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
+++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
@@ -40,7 +40,7 @@ std::unique_ptr<column> group_replace_nulls(
   cudf::column_view const& grouped_value,
   device_span<size_type const> group_labels,
   cudf::replace_policy replace_policy,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index 66cbf24e607..98d3713c5c5 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -35,24 +35,24 @@ std::unique_ptr<column> hash(
   table_view const& input,
   hash_id hash_function               = hash_id::HASH_MURMUR3,
   uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> murmur_hash3_32(
   table_view const& input,
   uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> spark_murmur_hash3_32(
   table_view const& input,
   uint32_t seed                       = cudf::DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> md5_hash(
   table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /* Copyright 2005-2014 Daniel James.
diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 1417be358de..3d22530f5b3 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -34,7 +34,7 @@ namespace detail {
  */
 std::unique_ptr<table> from_dlpack(
   DLManagedTensor const* managed_tensor,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -44,7 +44,7 @@ std::unique_ptr<table> from_dlpack(
  */
 DLManagedTensor* to_dlpack(
   table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 // Creating arrow as per given type_id and buffer arguments
@@ -104,7 +104,7 @@ data_type arrow_to_cudf_type(arrow::DataType const& arrow_type);
  */
 std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        std::vector<column_metadata> const& metadata = {},
-                                       rmm::cuda_stream_view stream = cudf::default_stream_value,
+                                       rmm::cuda_stream_view stream = cudf::get_default_stream(),
                                        arrow::MemoryPool* ar_mr     = arrow::default_memory_pool());
 
 /**
@@ -114,7 +114,7 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
  */
 std::unique_ptr<table> from_arrow(
   arrow::Table const& input_table,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp
index f9f42bdae1d..e70fa8cfe5f 100644
--- a/cpp/include/cudf/detail/is_element_valid.hpp
+++ b/cpp/include/cudf/detail/is_element_valid.hpp
@@ -41,7 +41,7 @@ namespace detail {
 
 bool is_element_valid_sync(column_view const& col_view,
                            size_type element_index,
-                           rmm::cuda_stream_view stream = cudf::default_stream_value);
+                           rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index a0385674f36..51cda214f7b 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -91,7 +91,7 @@ struct hash_join {
    */
   hash_join(cudf::table_view const& build,
             cudf::null_equality compare_nulls,
-            rmm::cuda_stream_view stream = cudf::default_stream_value);
+            rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @copydoc cudf::hash_join::inner_join
diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp
index 846893b70f6..af9f5fb82f5 100644
--- a/cpp/include/cudf/detail/label_bins.hpp
+++ b/cpp/include/cudf/detail/label_bins.hpp
@@ -51,7 +51,7 @@ std::unique_ptr<column> label_bins(
   inclusive left_inclusive,
   column_view const& right_edges,
   inclusive right_inclusive,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp
index 5d4f62e0feb..f75e3b06ccf 100644
--- a/cpp/include/cudf/detail/null_mask.hpp
+++ b/cpp/include/cudf/detail/null_mask.hpp
@@ -34,7 +34,7 @@ namespace detail {
 rmm::device_buffer create_null_mask(
   size_type size,
   mask_state state,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -46,7 +46,7 @@ void set_null_mask(bitmask_type* bitmask,
                    size_type begin_bit,
                    size_type end_bit,
                    bool valid,
-                   rmm::cuda_stream_view stream = cudf::default_stream_value);
+                   rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Given a bitmask, counts the number of set (1) bits in the range
diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp
index 82b8ff35bfc..c75b2d135d8 100644
--- a/cpp/include/cudf/detail/quantiles.hpp
+++ b/cpp/include/cudf/detail/quantiles.hpp
@@ -35,7 +35,7 @@ std::unique_ptr<column> quantile(
   interpolation interp                = interpolation::LINEAR,
   column_view const& ordered_indices  = {},
   bool exact                          = true,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -50,7 +50,7 @@ std::unique_ptr<table> quantiles(
   cudf::sorted is_input_sorted                   = sorted::NO,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -62,7 +62,7 @@ std::unique_ptr<table> quantiles(
 std::unique_ptr<column> percentile_approx(
   tdigest::tdigest_column_view const& input,
   column_view const& percentiles,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index 7877fe13951..fa6652b0db3 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -46,7 +46,7 @@ std::unique_ptr<scalar> sum(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -67,7 +67,7 @@ std::unique_ptr<scalar> min(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -88,7 +88,7 @@ std::unique_ptr<scalar> max(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -110,7 +110,7 @@ std::unique_ptr<scalar> any(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -132,7 +132,7 @@ std::unique_ptr<scalar> all(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -154,7 +154,7 @@ std::unique_ptr<scalar> product(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -174,7 +174,7 @@ std::unique_ptr<scalar> product(
 std::unique_ptr<scalar> sum_of_squares(
   column_view const& col,
   data_type const output_dtype,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -194,7 +194,7 @@ std::unique_ptr<scalar> sum_of_squares(
 std::unique_ptr<scalar> mean(
   column_view const& col,
   data_type const output_dtype,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -217,7 +217,7 @@ std::unique_ptr<scalar> variance(
   column_view const& col,
   data_type const output_dtype,
   cudf::size_type ddof,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -240,7 +240,7 @@ std::unique_ptr<scalar> standard_deviation(
   column_view const& col,
   data_type const output_dtype,
   cudf::size_type ddof,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -271,7 +271,7 @@ std::unique_ptr<scalar> nth_element(
   column_view const& col,
   size_type n,
   null_policy null_handling,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -286,7 +286,7 @@ std::unique_ptr<scalar> nth_element(
 std::unique_ptr<scalar> collect_list(
   column_view const& col,
   null_policy null_handling,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -299,7 +299,7 @@ std::unique_ptr<scalar> collect_list(
  */
 std::unique_ptr<scalar> merge_lists(
   lists_column_view const& col,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -318,7 +318,7 @@ std::unique_ptr<scalar> collect_set(
   null_policy null_handling,
   null_equality nulls_equal,
   nan_equality nans_equal,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -335,7 +335,7 @@ std::unique_ptr<scalar> merge_sets(
   lists_column_view const& col,
   null_equality nulls_equal,
   nan_equality nans_equal,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -363,7 +363,7 @@ std::unique_ptr<column> segmented_sum(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -391,7 +391,7 @@ std::unique_ptr<column> segmented_product(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -418,7 +418,7 @@ std::unique_ptr<column> segmented_min(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -445,7 +445,7 @@ std::unique_ptr<column> segmented_max(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -473,7 +473,7 @@ std::unique_ptr<column> segmented_any(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -501,7 +501,7 @@ std::unique_ptr<column> segmented_all(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace reduction
diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp
index 9bd03878579..39a0de1bd31 100644
--- a/cpp/include/cudf/detail/repeat.hpp
+++ b/cpp/include/cudf/detail/repeat.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<table> repeat(
   table_view const& input_table,
   column_view const& count,
   bool check_count,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -48,7 +48,7 @@ std::unique_ptr<table> repeat(
 std::unique_ptr<table> repeat(
   table_view const& input_table,
   size_type count,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp
index 4c2c6e3b171..8e6e0729d07 100644
--- a/cpp/include/cudf/detail/replace.hpp
+++ b/cpp/include/cudf/detail/replace.hpp
@@ -34,7 +34,7 @@ namespace detail {
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
   cudf::column_view const& replacement,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -46,7 +46,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
   scalar const& replacement,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -58,7 +58,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
   replace_policy const& replace_policy,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -70,7 +70,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nans(
   column_view const& input,
   column_view const& replacement,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -82,7 +82,7 @@ std::unique_ptr<column> replace_nans(
 std::unique_ptr<column> replace_nans(
   column_view const& input,
   scalar const& replacement,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -94,7 +94,7 @@ std::unique_ptr<column> find_and_replace_all(
   column_view const& input_col,
   column_view const& values_to_replace,
   column_view const& replacement_values,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -104,7 +104,7 @@ std::unique_ptr<column> find_and_replace_all(
  */
 std::unique_ptr<column> normalize_nans_and_zeros(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp
index be10b2c582d..205761d6888 100644
--- a/cpp/include/cudf/detail/reshape.hpp
+++ b/cpp/include/cudf/detail/reshape.hpp
@@ -33,7 +33,7 @@ namespace detail {
 std::unique_ptr<table> tile(
   table_view const& input,
   size_type count,
-  rmm::cuda_stream_view               = cudf::default_stream_value,
+  rmm::cuda_stream_view               = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -43,7 +43,7 @@ std::unique_ptr<table> tile(
  */
 std::unique_ptr<column> interleave_columns(
   table_view const& input,
-  rmm::cuda_stream_view               = cudf::default_stream_value,
+  rmm::cuda_stream_view               = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp
index e0bdde98c0a..40bedf4046d 100644
--- a/cpp/include/cudf/detail/rolling.hpp
+++ b/cpp/include/cudf/detail/rolling.hpp
@@ -45,7 +45,7 @@ std::unique_ptr<column> rolling_window(
   column_view const& following_window,
   size_type min_periods,
   rolling_aggregation const& agg,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp
index 49e6c528eb3..89c9ce6d0e7 100644
--- a/cpp/include/cudf/detail/round.hpp
+++ b/cpp/include/cudf/detail/round.hpp
@@ -35,7 +35,7 @@ std::unique_ptr<column> round(
   column_view const& input,
   int32_t decimal_places,
   rounding_method method,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 413f4c4dae4..af4854965ee 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -390,7 +390,7 @@ std::unique_ptr<table> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 801088b803c..515df255f4a 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -63,7 +63,7 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   column_view const& scatter_map,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -76,7 +76,7 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   device_span<size_type const> const scatter_map,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -112,7 +112,7 @@ std::unique_ptr<table> scatter(
   std::vector<std::reference_wrapper<const scalar>> const& source,
   column_view const& indices,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -127,7 +127,7 @@ std::unique_ptr<table> boolean_mask_scatter(
   table_view const& source,
   table_view const& target,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -143,7 +143,7 @@ std::unique_ptr<table> boolean_mask_scatter(
   std::vector<std::reference_wrapper<const scalar>> const& source,
   table_view const& target,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp
index 8b3ef46d0ad..a4bebb1886c 100644
--- a/cpp/include/cudf/detail/sequence.hpp
+++ b/cpp/include/cudf/detail/sequence.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<column> sequence(
   size_type size,
   scalar const& init,
   scalar const& step,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -49,7 +49,7 @@ std::unique_ptr<column> sequence(
 std::unique_ptr<column> sequence(
   size_type size,
   scalar const& init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -64,7 +64,7 @@ std::unique_ptr<cudf::column> calendrical_month_sequence(
   size_type size,
   scalar const& init,
   size_type months,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/sorting.hpp b/cpp/include/cudf/detail/sorting.hpp
index a68407d9194..66b3f5071c6 100644
--- a/cpp/include/cudf/detail/sorting.hpp
+++ b/cpp/include/cudf/detail/sorting.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<column> sorted_order(
   table_view const& input,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -48,7 +48,7 @@ std::unique_ptr<column> stable_sorted_order(
   table_view const& input,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -61,7 +61,7 @@ std::unique_ptr<table> sort_by_key(
   table_view const& keys,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -74,7 +74,7 @@ std::unique_ptr<table> stable_sort_by_key(
   table_view const& keys,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -87,7 +87,7 @@ std::unique_ptr<column> segmented_sorted_order(
   column_view const& segment_offsets,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -100,7 +100,7 @@ std::unique_ptr<column> stable_segmented_sorted_order(
   column_view const& segment_offsets,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -114,7 +114,7 @@ std::unique_ptr<table> segmented_sort_by_key(
   column_view const& segment_offsets,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -128,7 +128,7 @@ std::unique_ptr<table> stable_segmented_sort_by_key(
   column_view const& segment_offsets,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 /**
@@ -140,7 +140,7 @@ std::unique_ptr<table> sort(
   table_view const& values,
   std::vector<order> const& column_order         = {},
   std::vector<null_order> const& null_precedence = {},
-  rmm::cuda_stream_view stream                   = cudf::default_stream_value,
+  rmm::cuda_stream_view stream                   = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr            = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index 0db929c523c..1651e8b33b6 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<table> drop_nulls(
   table_view const& input,
   std::vector<size_type> const& keys,
   cudf::size_type keep_threshold,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -49,7 +49,7 @@ std::unique_ptr<table> drop_nans(
   table_view const& input,
   std::vector<size_type> const& keys,
   cudf::size_type keep_threshold,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -60,7 +60,7 @@ std::unique_ptr<table> drop_nans(
 std::unique_ptr<table> apply_boolean_mask(
   table_view const& input,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -73,7 +73,7 @@ std::unique_ptr<table> unique(
   std::vector<size_type> const& keys,
   duplicate_keep_option keep,
   null_equality nulls_equal           = null_equality::EQUAL,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -87,7 +87,7 @@ std::unique_ptr<table> distinct(
   duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
   null_equality nulls_equal           = null_equality::EQUAL,
   nan_equality nans_equal             = nan_equality::ALL_EQUAL,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -116,7 +116,7 @@ std::unique_ptr<table> stable_distinct(
   duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
   null_equality nulls_equal           = null_equality::EQUAL,
   nan_equality nans_equal             = nan_equality::ALL_EQUAL,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -138,7 +138,7 @@ rmm::device_uvector<size_type> get_distinct_indices(
   duplicate_keep_option keep          = duplicate_keep_option::KEEP_ANY,
   null_equality nulls_equal           = null_equality::EQUAL,
   nan_equality nans_equal             = nan_equality::ALL_EQUAL,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -149,7 +149,7 @@ rmm::device_uvector<size_type> get_distinct_indices(
 cudf::size_type unique_count(column_view const& input,
                              null_policy null_handling,
                              nan_policy nan_handling,
-                             rmm::cuda_stream_view stream = cudf::default_stream_value);
+                             rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::unique_count(table_view const&, null_equality)
@@ -158,7 +158,7 @@ cudf::size_type unique_count(column_view const& input,
  */
 cudf::size_type unique_count(table_view const& input,
                              null_equality nulls_equal    = null_equality::EQUAL,
-                             rmm::cuda_stream_view stream = cudf::default_stream_value);
+                             rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::distinct_count(column_view const&, null_policy, nan_policy)
@@ -168,7 +168,7 @@ cudf::size_type unique_count(table_view const& input,
 cudf::size_type distinct_count(column_view const& input,
                                null_policy null_handling,
                                nan_policy nan_handling,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @copydoc cudf::distinct_count(table_view const&, null_equality)
@@ -177,7 +177,7 @@ cudf::size_type distinct_count(column_view const& input,
  */
 cudf::size_type distinct_count(table_view const& input,
                                null_equality nulls_equal    = null_equality::EQUAL,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index 1a4b8f02dd3..03e752c102d 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -189,7 +189,7 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
  */
 std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
   column_view const& parent,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -215,7 +215,7 @@ std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_paren
  */
 std::tuple<cudf::table_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
   table_view const& table,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp
index 41e734ffe83..f1b795e21a9 100644
--- a/cpp/include/cudf/detail/tdigest/tdigest.hpp
+++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp
@@ -139,7 +139,7 @@ std::unique_ptr<column> make_tdigest_column(
   std::unique_ptr<column>&& tdigest_offsets,
   std::unique_ptr<column>&& min_values,
   std::unique_ptr<column>&& max_values,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -153,7 +153,7 @@ std::unique_ptr<column> make_tdigest_column(
  * @returns An empty tdigest column.
  */
 std::unique_ptr<column> make_empty_tdigest_column(
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -167,7 +167,7 @@ std::unique_ptr<column> make_empty_tdigest_column(
  * @returns An empty tdigest scalar.
  */
 std::unique_ptr<scalar> make_empty_tdigest_scalar(
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp
index 929c4700873..26cdf917cda 100644
--- a/cpp/include/cudf/detail/transform.hpp
+++ b/cpp/include/cudf/detail/transform.hpp
@@ -34,7 +34,7 @@ std::unique_ptr<column> transform(
   std::string const& unary_udf,
   data_type output_type,
   bool is_ptx,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -45,7 +45,7 @@ std::unique_ptr<column> transform(
 std::unique_ptr<column> compute_column(
   table_view const table,
   ast::operation const& expr,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -55,7 +55,7 @@ std::unique_ptr<column> compute_column(
  */
 std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -65,7 +65,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
  */
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -75,7 +75,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
  */
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
   cudf::table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -86,7 +86,7 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
   column_view const& input,
   column_view const& categories,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -98,7 +98,7 @@ std::unique_ptr<column> mask_to_bools(
   bitmask_type const* null_mask,
   size_type begin_bit,
   size_type end_bit,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -108,7 +108,7 @@ std::unique_ptr<column> mask_to_bools(
  */
 std::unique_ptr<column> row_bit_count(
   table_view const& t,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp
index 367421a5ee1..14f80a99de9 100644
--- a/cpp/include/cudf/detail/transpose.hpp
+++ b/cpp/include/cudf/detail/transpose.hpp
@@ -30,7 +30,7 @@ namespace detail {
  */
 std::pair<std::unique_ptr<column>, table_view> transpose(
   table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index 5d1c29aba78..c92b4f7683b 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -50,7 +50,7 @@ std::unique_ptr<column> true_if(
   InputIterator end,
   size_type size,
   Predicate p,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto output =
@@ -71,7 +71,7 @@ std::unique_ptr<column> true_if(
 std::unique_ptr<cudf::column> unary_operation(
   cudf::column_view const& input,
   cudf::unary_operator op,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -82,7 +82,7 @@ std::unique_ptr<cudf::column> unary_operation(
 std::unique_ptr<column> cast(
   column_view const& input,
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -92,7 +92,7 @@ std::unique_ptr<column> cast(
  */
 std::unique_ptr<column> is_nan(
   cudf::column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -102,7 +102,7 @@ std::unique_ptr<column> is_nan(
  */
 std::unique_ptr<column> is_not_nan(
   cudf::column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh
index d57078f892f..02564ea1343 100644
--- a/cpp/include/cudf/detail/utilities/cuda.cuh
+++ b/cpp/include/cudf/detail/utilities/cuda.cuh
@@ -171,7 +171,7 @@ __global__ void single_thread_kernel(F f)
  */
 template <class Functor>
 void device_single_thread(Functor functor,
-                          rmm::cuda_stream_view stream = cudf::default_stream_value)
+                          rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   single_thread_kernel<<<1, 1, 0, stream.value()>>>(functor);
 }
diff --git a/cpp/include/cudf/detail/utilities/default_stream.hpp b/cpp/include/cudf/detail/utilities/default_stream.hpp
new file mode 100644
index 00000000000..fa438f142b7
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/default_stream.hpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+
+namespace detail {
+
+/**
+ * @brief Default stream for cudf
+ *
+ * Use this value to ensure the correct stream is used when compiled with per
+ * thread default stream.
+ */
+extern rmm::cuda_stream_view const default_stream_value;
+
+}  // namespace detail
+
+}  // namespace cudf
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index d7fdb153c19..d59ecea8bb0 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -72,7 +72,7 @@ rmm::device_uvector<T> make_zeroed_device_uvector_async(
 template <typename T>
 rmm::device_uvector<T> make_zeroed_device_uvector_sync(
   std::size_t size,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   rmm::device_uvector<T> ret(size, stream, mr);
@@ -148,7 +148,7 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_async(
   device_span<T const> source_data,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   rmm::device_uvector<T> ret(source_data.size(), stream, mr);
@@ -201,7 +201,7 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_sync(
   host_span<T const> source_data,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto ret = make_device_uvector_async(source_data, stream, mr);
@@ -228,7 +228,7 @@ template <
     std::is_convertible_v<Container, host_span<typename Container::value_type const>>>* = nullptr>
 rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
   Container const& c,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_device_uvector_sync(host_span<typename Container::value_type const>{c}, stream, mr);
@@ -249,7 +249,7 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_sync(
   device_span<T const> source_data,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto ret = make_device_uvector_async(source_data, stream, mr);
@@ -276,7 +276,7 @@ template <
     std::is_convertible_v<Container, device_span<typename Container::value_type const>>>* = nullptr>
 rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
   Container const& c,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_device_uvector_sync(device_span<typename Container::value_type const>{c}, stream, mr);
@@ -367,7 +367,7 @@ template <
   std::enable_if_t<
     std::is_convertible_v<Container, device_span<typename Container::value_type const>>>* = nullptr>
 std::vector<typename Container::value_type> make_std_vector_sync(
-  Container const& c, rmm::cuda_stream_view stream = cudf::default_stream_value)
+  Container const& c, rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   return make_std_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
@@ -424,7 +424,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_async(
  */
 template <typename T>
 thrust::host_vector<T> make_host_vector_sync(
-  device_span<T const> v, rmm::cuda_stream_view stream = cudf::default_stream_value)
+  device_span<T const> v, rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   auto result = make_host_vector_async(v, stream);
   stream.synchronize();
@@ -448,7 +448,7 @@ template <
   std::enable_if_t<
     std::is_convertible_v<Container, device_span<typename Container::value_type const>>>* = nullptr>
 thrust::host_vector<typename Container::value_type> make_host_vector_sync(
-  Container const& c, rmm::cuda_stream_view stream = cudf::default_stream_value)
+  Container const& c, rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   return make_host_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh
index 0fe7edad21d..56cc73e63e2 100644
--- a/cpp/include/cudf/detail/valid_if.cuh
+++ b/cpp/include/cudf/detail/valid_if.cuh
@@ -90,7 +90,7 @@ std::pair<rmm::device_buffer, size_type> valid_if(
   InputIterator begin,
   InputIterator end,
   Predicate p,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(begin <= end, "Invalid range.");
diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp
index 9f154a054f8..e893e9d6499 100644
--- a/cpp/include/cudf/dictionary/detail/concatenate.hpp
+++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp
@@ -39,7 +39,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp
index 17173564a9a..454b8400f87 100644
--- a/cpp/include/cudf/dictionary/detail/encode.hpp
+++ b/cpp/include/cudf/dictionary/detail/encode.hpp
@@ -54,7 +54,7 @@ namespace detail {
 std::unique_ptr<column> encode(
   column_view const& column,
   data_type indices_type              = data_type{type_id::UINT32},
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -74,7 +74,7 @@ std::unique_ptr<column> encode(
  */
 std::unique_ptr<column> decode(
   dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp
index 2b38a6c40ec..a13a5eee6cb 100644
--- a/cpp/include/cudf/dictionary/detail/replace.hpp
+++ b/cpp/include/cudf/dictionary/detail/replace.hpp
@@ -42,7 +42,7 @@ namespace detail {
 std::unique_ptr<column> replace_nulls(
   dictionary_column_view const& input,
   dictionary_column_view const& replacement,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -60,7 +60,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nulls(
   dictionary_column_view const& input,
   scalar const& replacement,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp
index 4f7939b32a7..9cf45eafc7d 100644
--- a/cpp/include/cudf/dictionary/detail/search.hpp
+++ b/cpp/include/cudf/dictionary/detail/search.hpp
@@ -34,7 +34,7 @@ namespace detail {
 std::unique_ptr<scalar> get_index(
   dictionary_column_view const& dictionary,
   scalar const& key,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -59,7 +59,7 @@ std::unique_ptr<scalar> get_index(
 std::unique_ptr<scalar> get_insert_index(
   dictionary_column_view const& dictionary,
   scalar const& key,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index 53fd71e0375..23681d36ee1 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -35,7 +35,7 @@ namespace detail {
 std::unique_ptr<column> add_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& new_keys,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -47,7 +47,7 @@ std::unique_ptr<column> add_keys(
 std::unique_ptr<column> remove_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& keys_to_remove,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -58,7 +58,7 @@ std::unique_ptr<column> remove_keys(
  */
 std::unique_ptr<column> remove_unused_keys(
   dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -70,7 +70,7 @@ std::unique_ptr<column> remove_unused_keys(
 std::unique_ptr<column> set_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& keys,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -81,7 +81,7 @@ std::unique_ptr<column> set_keys(
  */
 std::vector<std::unique_ptr<column>> match_dictionaries(
   cudf::host_span<dictionary_column_view const> input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -105,7 +105,7 @@ std::vector<std::unique_ptr<column>> match_dictionaries(
  */
 std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_dictionaries(
   std::vector<table_view> tables,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/dictionary_factories.hpp b/cpp/include/cudf/dictionary/dictionary_factories.hpp
index b27fa25a27a..821981ad148 100644
--- a/cpp/include/cudf/dictionary/dictionary_factories.hpp
+++ b/cpp/include/cudf/dictionary/dictionary_factories.hpp
@@ -65,7 +65,7 @@ namespace cudf {
 std::unique_ptr<column> make_dictionary_column(
   column_view const& keys_column,
   column_view const& indices_column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -117,7 +117,7 @@ std::unique_ptr<column> make_dictionary_column(std::unique_ptr<column> keys_colu
 std::unique_ptr<column> make_dictionary_column(
   std::unique_ptr<column> keys_column,
   std::unique_ptr<column> indices_column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp
index 9551b1f05df..00665873b67 100644
--- a/cpp/include/cudf/io/detail/avro.hpp
+++ b/cpp/include/cudf/io/detail/avro.hpp
@@ -39,7 +39,7 @@ namespace avro {
 table_with_metadata read_avro(
   std::unique_ptr<cudf::io::datasource>&& source,
   avro_reader_options const& options,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace avro
diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp
index 0d79ecd0d77..920b815ce12 100644
--- a/cpp/include/cudf/io/detail/csv.hpp
+++ b/cpp/include/cudf/io/detail/csv.hpp
@@ -55,7 +55,7 @@ void write_csv(data_sink* sink,
                table_view const& table,
                const table_metadata* metadata,
                csv_writer_options const& options,
-               rmm::cuda_stream_view stream        = cudf::default_stream_value,
+               rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace csv
diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp
index 3e69ef8a3b8..6d0d23c3c78 100644
--- a/cpp/include/cudf/io/detail/json.hpp
+++ b/cpp/include/cudf/io/detail/json.hpp
@@ -39,7 +39,7 @@ namespace json {
 table_with_metadata read_json(
   std::vector<std::unique_ptr<cudf::io::datasource>>& sources,
   json_reader_options const& options,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace json
diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp
index 79fcf4bd916..10bdf6e3e71 100644
--- a/cpp/include/cudf/io/detail/orc.hpp
+++ b/cpp/include/cudf/io/detail/orc.hpp
@@ -75,7 +75,7 @@ class reader {
    * @return The set of columns along with table metadata
    */
   table_with_metadata read(orc_reader_options const& options,
-                           rmm::cuda_stream_view stream = cudf::default_stream_value);
+                           rmm::cuda_stream_view stream = cudf::get_default_stream());
 };
 
 /**
diff --git a/cpp/include/cudf/join.hpp b/cpp/include/cudf/join.hpp
index bc3bfef3a7d..b613a661d95 100644
--- a/cpp/include/cudf/join.hpp
+++ b/cpp/include/cudf/join.hpp
@@ -287,7 +287,7 @@ class hash_join {
    */
   hash_join(cudf::table_view const& build,
             null_equality compare_nulls,
-            rmm::cuda_stream_view stream = cudf::default_stream_value);
+            rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * Returns the row indices that can be used to construct the result of performing
@@ -308,7 +308,7 @@ class hash_join {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   inner_join(cudf::table_view const& probe,
              std::optional<std::size_t> output_size = {},
-             rmm::cuda_stream_view stream           = cudf::default_stream_value,
+             rmm::cuda_stream_view stream           = cudf::get_default_stream(),
              rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
@@ -330,7 +330,7 @@ class hash_join {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   left_join(cudf::table_view const& probe,
             std::optional<std::size_t> output_size = {},
-            rmm::cuda_stream_view stream           = cudf::default_stream_value,
+            rmm::cuda_stream_view stream           = cudf::get_default_stream(),
             rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
@@ -352,7 +352,7 @@ class hash_join {
             std::unique_ptr<rmm::device_uvector<size_type>>>
   full_join(cudf::table_view const& probe,
             std::optional<std::size_t> output_size = {},
-            rmm::cuda_stream_view stream           = cudf::default_stream_value,
+            rmm::cuda_stream_view stream           = cudf::get_default_stream(),
             rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource()) const;
 
   /**
@@ -366,7 +366,7 @@ class hash_join {
    * `build` and `probe` as the the join keys .
    */
   [[nodiscard]] std::size_t inner_join_size(
-    cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+    cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * Returns the exact number of matches (rows) when performing a left join with the specified probe
@@ -379,7 +379,7 @@ class hash_join {
    * and `probe` as the the join keys .
    */
   [[nodiscard]] std::size_t left_join_size(
-    cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+    cudf::table_view const& probe, rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * Returns the exact number of matches (rows) when performing a full join with the specified probe
@@ -395,7 +395,7 @@ class hash_join {
    */
   std::size_t full_join_size(
     cudf::table_view const& probe,
-    rmm::cuda_stream_view stream        = cudf::default_stream_value,
+    rmm::cuda_stream_view stream        = cudf::get_default_stream(),
     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
 
  private:
diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp
index e2e17579c85..f2982a67389 100644
--- a/cpp/include/cudf/lists/detail/concatenate.hpp
+++ b/cpp/include/cudf/lists/detail/concatenate.hpp
@@ -45,7 +45,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh
index 9cbe9582456..f53e8ca8033 100644
--- a/cpp/include/cudf/lists/detail/gather.cuh
+++ b/cpp/include/cudf/lists/detail/gather.cuh
@@ -321,7 +321,7 @@ std::unique_ptr<column> segmented_gather(
   lists_column_view const& source_column,
   lists_column_view const& gather_map_list,
   out_of_bounds_policy bounds_policy  = out_of_bounds_policy::DONT_CHECK,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh
index 2e60df4a5ae..c343eea1014 100644
--- a/cpp/include/cudf/lists/detail/scatter.cuh
+++ b/cpp/include/cudf/lists/detail/scatter.cuh
@@ -96,7 +96,7 @@ std::unique_ptr<column> scatter_impl(
   MapIterator scatter_map_end,
   column_view const& source,
   column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types.");
@@ -169,7 +169,7 @@ std::unique_ptr<column> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto const num_rows = target.size();
@@ -226,7 +226,7 @@ std::unique_ptr<column> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto const num_rows = target.size();
diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp
index 2b40a875cc9..e02fa3fde5f 100644
--- a/cpp/include/cudf/lists/lists_column_factories.hpp
+++ b/cpp/include/cudf/lists/lists_column_factories.hpp
@@ -38,7 +38,7 @@ namespace detail {
 std::unique_ptr<cudf::column> make_lists_column_from_scalar(
   list_scalar const& value,
   size_type size,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/partitioning.hpp b/cpp/include/cudf/partitioning.hpp
index 6e9f571cc9d..2c91bdf64f5 100644
--- a/cpp/include/cudf/partitioning.hpp
+++ b/cpp/include/cudf/partitioning.hpp
@@ -98,7 +98,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> hash_partition(
   int num_partitions,
   hash_id hash_function               = hash_id::HASH_MURMUR3,
   uint32_t seed                       = DEFAULT_HASH_SEED,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index 9b9c73071af..6161639a6fb 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -64,7 +64,7 @@ class scalar {
    * @param is_valid true: set the value to valid. false: set it to null.
    * @param stream CUDA stream used for device memory operations.
    */
-  void set_valid_async(bool is_valid, rmm::cuda_stream_view stream = cudf::default_stream_value);
+  void set_valid_async(bool is_valid, rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @brief Indicates whether the scalar contains a valid value.
@@ -76,7 +76,7 @@ class scalar {
    * @return true Value is valid
    * @return false Value is invalid/null
    */
-  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+  [[nodiscard]] bool is_valid(rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * @brief Returns a raw pointer to the validity bool in device memory.
@@ -112,7 +112,7 @@ class scalar {
    * @param mr Device memory resource to use for device memory allocation.
    */
   scalar(scalar const& other,
-         rmm::cuda_stream_view stream        = cudf::default_stream_value,
+         rmm::cuda_stream_view stream        = cudf::get_default_stream(),
          rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -128,7 +128,7 @@ class scalar {
    */
   scalar(data_type type,
          bool is_valid                       = false,
-         rmm::cuda_stream_view stream        = cudf::default_stream_value,
+         rmm::cuda_stream_view stream        = cudf::get_default_stream(),
          rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 };
 
@@ -164,7 +164,7 @@ class fixed_width_scalar : public scalar {
    * @param mr Device memory resource to use for device memory allocation.
    */
   fixed_width_scalar(fixed_width_scalar const& other,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -173,7 +173,7 @@ class fixed_width_scalar : public scalar {
    * @param value New value of scalar.
    * @param stream CUDA stream used for device memory operations.
    */
-  void set_value(T value, rmm::cuda_stream_view stream = cudf::default_stream_value);
+  void set_value(T value, rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   /**
    * @brief Explicit conversion operator to get the value of the scalar on the host.
@@ -186,7 +186,7 @@ class fixed_width_scalar : public scalar {
    * @param stream CUDA stream used for device memory operations.
    * @return Value of the scalar
    */
-  T value(rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+  T value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * @brief Returns a raw pointer to the value in device memory.
@@ -215,7 +215,7 @@ class fixed_width_scalar : public scalar {
    */
   fixed_width_scalar(T value,
                      bool is_valid                       = true,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -228,7 +228,7 @@ class fixed_width_scalar : public scalar {
    */
   fixed_width_scalar(rmm::device_scalar<T>&& data,
                      bool is_valid                       = true,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 };
 
@@ -264,7 +264,7 @@ class numeric_scalar : public detail::fixed_width_scalar<T> {
    * @param mr Device memory resource to use for device memory allocation.
    */
   numeric_scalar(numeric_scalar const& other,
-                 rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                 rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -277,7 +277,7 @@ class numeric_scalar : public detail::fixed_width_scalar<T> {
    */
   numeric_scalar(T value,
                  bool is_valid                       = true,
-                 rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                 rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -290,7 +290,7 @@ class numeric_scalar : public detail::fixed_width_scalar<T> {
    */
   numeric_scalar(rmm::device_scalar<T>&& data,
                  bool is_valid                       = true,
-                 rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                 rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 };
 
@@ -327,7 +327,7 @@ class fixed_point_scalar : public scalar {
    * @param mr Device memory resource to use for device memory allocation.
    */
   fixed_point_scalar(fixed_point_scalar const& other,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -342,7 +342,7 @@ class fixed_point_scalar : public scalar {
   fixed_point_scalar(rep_type value,
                      numeric::scale_type scale,
                      bool is_valid                       = true,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -355,7 +355,7 @@ class fixed_point_scalar : public scalar {
    */
   fixed_point_scalar(rep_type value,
                      bool is_valid                       = true,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -368,7 +368,7 @@ class fixed_point_scalar : public scalar {
    */
   fixed_point_scalar(T value,
                      bool is_valid                       = true,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -383,7 +383,7 @@ class fixed_point_scalar : public scalar {
   fixed_point_scalar(rmm::device_scalar<rep_type>&& data,
                      numeric::scale_type scale,
                      bool is_valid                       = true,
-                     rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                      rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -392,7 +392,7 @@ class fixed_point_scalar : public scalar {
    * @param stream CUDA stream used for device memory operations.
    * @return The value of the scalar
    */
-  rep_type value(rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+  rep_type value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * @brief Get the decimal32, decimal64 or decimal128.
@@ -400,7 +400,7 @@ class fixed_point_scalar : public scalar {
    * @param stream CUDA stream used for device memory operations.
    * @return The decimal32, decimal64 or decimal128 value
    */
-  T fixed_point_value(rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+  T fixed_point_value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * @brief Explicit conversion operator to get the value of the scalar on the host.
@@ -451,7 +451,7 @@ class string_scalar : public scalar {
    * @param mr Device memory resource to use for device memory allocation.
    */
   string_scalar(string_scalar const& other,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -464,7 +464,7 @@ class string_scalar : public scalar {
    */
   string_scalar(std::string const& string,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -479,7 +479,7 @@ class string_scalar : public scalar {
    */
   string_scalar(value_type const& source,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -494,7 +494,7 @@ class string_scalar : public scalar {
    */
   string_scalar(rmm::device_scalar<value_type>& data,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -510,7 +510,7 @@ class string_scalar : public scalar {
    */
   string_scalar(rmm::device_buffer&& data,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -525,7 +525,7 @@ class string_scalar : public scalar {
    * @return The value of the scalar in a host std::string
    */
   [[nodiscard]] std::string to_string(
-    rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+    rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * @brief Get the value of the scalar as a string_view.
@@ -533,7 +533,7 @@ class string_scalar : public scalar {
    * @param stream CUDA stream used for device memory operations.
    * @return The value of the scalar as a string_view
    */
-  [[nodiscard]] value_type value(rmm::cuda_stream_view stream = cudf::default_stream_value) const;
+  [[nodiscard]] value_type value(rmm::cuda_stream_view stream = cudf::get_default_stream()) const;
 
   /**
    * @brief Returns the size of the string in bytes.
@@ -582,7 +582,7 @@ class chrono_scalar : public detail::fixed_width_scalar<T> {
    * @param mr Device memory resource to use for device memory allocation.
    */
   chrono_scalar(chrono_scalar const& other,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -595,7 +595,7 @@ class chrono_scalar : public detail::fixed_width_scalar<T> {
    */
   chrono_scalar(T value,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -608,7 +608,7 @@ class chrono_scalar : public detail::fixed_width_scalar<T> {
    */
   chrono_scalar(rmm::device_scalar<T>&& data,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 };
 
@@ -641,7 +641,7 @@ class timestamp_scalar : public chrono_scalar<T> {
    * @param mr Device memory resource to use for device memory allocation.
    */
   timestamp_scalar(timestamp_scalar const& other,
-                   rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -657,7 +657,7 @@ class timestamp_scalar : public chrono_scalar<T> {
   template <typename Duration2>
   timestamp_scalar(Duration2 const& value,
                    bool is_valid,
-                   rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -696,7 +696,7 @@ class duration_scalar : public chrono_scalar<T> {
    * @param mr Device memory resource to use for device memory allocation.
    */
   duration_scalar(duration_scalar const& other,
-                  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -709,7 +709,7 @@ class duration_scalar : public chrono_scalar<T> {
    */
   duration_scalar(rep_type value,
                   bool is_valid,
-                  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -744,7 +744,7 @@ class list_scalar : public scalar {
    * @param mr Device memory resource to use for device memory allocation.
    */
   list_scalar(list_scalar const& other,
-              rmm::cuda_stream_view stream        = cudf::default_stream_value,
+              rmm::cuda_stream_view stream        = cudf::get_default_stream(),
               rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -759,7 +759,7 @@ class list_scalar : public scalar {
    */
   list_scalar(cudf::column_view const& data,
               bool is_valid                       = true,
-              rmm::cuda_stream_view stream        = cudf::default_stream_value,
+              rmm::cuda_stream_view stream        = cudf::get_default_stream(),
               rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -772,7 +772,7 @@ class list_scalar : public scalar {
    */
   list_scalar(cudf::column&& data,
               bool is_valid                       = true,
-              rmm::cuda_stream_view stream        = cudf::default_stream_value,
+              rmm::cuda_stream_view stream        = cudf::get_default_stream(),
               rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -809,7 +809,7 @@ class struct_scalar : public scalar {
    * @param mr Device memory resource to use for device memory allocation.
    */
   struct_scalar(struct_scalar const& other,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -824,7 +824,7 @@ class struct_scalar : public scalar {
    */
   struct_scalar(table_view const& data,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -839,7 +839,7 @@ class struct_scalar : public scalar {
    */
   struct_scalar(host_span<column_view const> data,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -855,7 +855,7 @@ class struct_scalar : public scalar {
    */
   struct_scalar(table&& data,
                 bool is_valid                       = true,
-                rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
diff --git a/cpp/include/cudf/scalar/scalar_factories.hpp b/cpp/include/cudf/scalar/scalar_factories.hpp
index b2b52ddc488..78b6c4fd0e9 100644
--- a/cpp/include/cudf/scalar/scalar_factories.hpp
+++ b/cpp/include/cudf/scalar/scalar_factories.hpp
@@ -43,7 +43,7 @@ namespace cudf {
  */
 std::unique_ptr<scalar> make_numeric_scalar(
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -60,7 +60,7 @@ std::unique_ptr<scalar> make_numeric_scalar(
  */
 std::unique_ptr<scalar> make_timestamp_scalar(
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -77,7 +77,7 @@ std::unique_ptr<scalar> make_timestamp_scalar(
  */
 std::unique_ptr<scalar> make_duration_scalar(
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -94,7 +94,7 @@ std::unique_ptr<scalar> make_duration_scalar(
  */
 std::unique_ptr<scalar> make_fixed_width_scalar(
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -111,7 +111,7 @@ std::unique_ptr<scalar> make_fixed_width_scalar(
  */
 std::unique_ptr<scalar> make_string_scalar(
   std::string const& string,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -126,7 +126,7 @@ std::unique_ptr<scalar> make_string_scalar(
  */
 std::unique_ptr<scalar> make_default_constructed_scalar(
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -141,7 +141,7 @@ std::unique_ptr<scalar> make_default_constructed_scalar(
  */
 std::unique_ptr<scalar> make_empty_scalar_like(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -156,7 +156,7 @@ std::unique_ptr<scalar> make_empty_scalar_like(
 template <typename T>
 std::unique_ptr<scalar> make_fixed_width_scalar(
   T value,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return std::make_unique<scalar_type_t<T>>(value, true, stream, mr);
@@ -176,7 +176,7 @@ template <typename T>
 std::unique_ptr<scalar> make_fixed_point_scalar(
   typename T::rep value,
   numeric::scale_type scale,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return std::make_unique<scalar_type_t<T>>(value, scale, true, stream, mr);
@@ -192,7 +192,7 @@ std::unique_ptr<scalar> make_fixed_point_scalar(
  */
 std::unique_ptr<scalar> make_list_scalar(
   column_view elements,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -207,7 +207,7 @@ std::unique_ptr<scalar> make_list_scalar(
  */
 std::unique_ptr<scalar> make_struct_scalar(
   table_view const& data,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -222,7 +222,7 @@ std::unique_ptr<scalar> make_struct_scalar(
  */
 std::unique_ptr<scalar> make_struct_scalar(
   host_span<column_view const> data,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp
index 7df3a4ce324..3de97ed69f1 100644
--- a/cpp/include/cudf/strings/detail/combine.hpp
+++ b/cpp/include/cudf/strings/detail/combine.hpp
@@ -39,7 +39,7 @@ std::unique_ptr<column> concatenate(
   string_scalar const& separator,
   string_scalar const& narep,
   separator_on_nulls separate_nulls   = separator_on_nulls::YES,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -52,7 +52,7 @@ std::unique_ptr<column> join_strings(
   strings_column_view const& strings,
   string_scalar const& separator,
   string_scalar const& narep,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index 0df86db60b6..76397c15dad 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -44,7 +44,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp
index 56e9c35c889..e4ae9917f58 100644
--- a/cpp/include/cudf/strings/detail/copying.hpp
+++ b/cpp/include/cudf/strings/detail/copying.hpp
@@ -53,7 +53,7 @@ std::unique_ptr<cudf::column> copy_slice(
   strings_column_view const& strings,
   size_type start,
   size_type end                       = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
index e8f9c9ca438..e6a2fa8ba4e 100644
--- a/cpp/include/cudf/strings/detail/fill.hpp
+++ b/cpp/include/cudf/strings/detail/fill.hpp
@@ -47,7 +47,7 @@ std::unique_ptr<column> fill(
   size_type begin,
   size_type end,
   string_scalar const& value,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp
index 90188910c7d..87a1040b67d 100644
--- a/cpp/include/cudf/strings/detail/json.hpp
+++ b/cpp/include/cudf/strings/detail/json.hpp
@@ -34,7 +34,7 @@ std::unique_ptr<cudf::column> get_json_object(
   cudf::strings_column_view const& col,
   cudf::string_scalar const& json_path,
   get_json_object_options options,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp
index ce1d5e8a925..814188d88c9 100644
--- a/cpp/include/cudf/strings/detail/replace.hpp
+++ b/cpp/include/cudf/strings/detail/replace.hpp
@@ -48,7 +48,7 @@ std::unique_ptr<column> replace(
   string_scalar const& target,
   string_scalar const& repl,
   int32_t maxrepl                     = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -62,7 +62,7 @@ std::unique_ptr<column> replace_slice(
   string_scalar const& repl           = string_scalar(""),
   size_type start                     = 0,
   size_type stop                      = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -75,7 +75,7 @@ std::unique_ptr<column> replace(
   strings_column_view const& strings,
   strings_column_view const& targets,
   strings_column_view const& repls,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -99,7 +99,7 @@ std::unique_ptr<column> replace(
 std::unique_ptr<column> replace_nulls(
   strings_column_view const& strings,
   string_scalar const& repl           = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index d430f390f10..10641677ea2 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -62,7 +62,7 @@ std::unique_ptr<column> scatter(
   SourceIterator end,
   MapIterator scatter_map,
   strings_column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   if (target.is_empty()) return make_empty_column(type_id::STRING);
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index 592f2128d0e..4eca9a5a55e 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -53,7 +53,7 @@ template <typename InputIterator>
 std::unique_ptr<column> make_offsets_child_column(
   InputIterator begin,
   InputIterator end,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(begin < end, "Invalid iterator range");
@@ -121,7 +121,7 @@ auto make_strings_children(
   SizeAndExecuteFunction size_and_exec_fn,
   size_type exec_size,
   size_type strings_count,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto offsets_column = make_numeric_column(
@@ -178,7 +178,7 @@ template <typename SizeAndExecuteFunction>
 auto make_strings_children(
   SizeAndExecuteFunction size_and_exec_fn,
   size_type strings_count,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr);
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index ceae93dfe84..f87932b4608 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -38,7 +38,7 @@ namespace detail {
  */
 std::unique_ptr<column> create_chars_child_column(
   size_type bytes,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -51,7 +51,7 @@ std::unique_ptr<column> create_chars_child_column(
  */
 rmm::device_uvector<string_view> create_string_vector_from_column(
   cudf::strings_column_view const strings,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/table/experimental/row_operators.cuh b/cpp/include/cudf/table/experimental/row_operators.cuh
index af7091fc00c..e7b0c6eb6b6 100644
--- a/cpp/include/cudf/table/experimental/row_operators.cuh
+++ b/cpp/include/cudf/table/experimental/row_operators.cuh
@@ -820,7 +820,7 @@ class self_comparator {
   self_comparator(table_view const& t,
                   host_span<order const> column_order         = {},
                   host_span<null_order const> null_precedence = {},
-                  rmm::cuda_stream_view stream                = cudf::default_stream_value)
+                  rmm::cuda_stream_view stream                = cudf::get_default_stream())
     : d_t{preprocessed_table::create(t, column_order, null_precedence, stream)}
   {
   }
@@ -962,7 +962,7 @@ class two_table_comparator {
                        table_view const& right,
                        host_span<order const> column_order         = {},
                        host_span<null_order const> null_precedence = {},
-                       rmm::cuda_stream_view stream                = cudf::default_stream_value);
+                       rmm::cuda_stream_view stream                = cudf::get_default_stream());
 
   /**
    * @brief Construct an owning object for performing a lexicographic comparison between two rows of
diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index 3b803c2b949..6d11ed0bfad 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -69,7 +69,7 @@ class table {
    * @param mr Device memory resource used for allocating the device memory for the new columns
    */
   table(table_view view,
-        rmm::cuda_stream_view stream        = cudf::default_stream_value,
+        rmm::cuda_stream_view stream        = cudf::get_default_stream(),
         rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
diff --git a/cpp/include/cudf/table/table_device_view.cuh b/cpp/include/cudf/table/table_device_view.cuh
index 9f6930b57f5..511013b585d 100644
--- a/cpp/include/cudf/table/table_device_view.cuh
+++ b/cpp/include/cudf/table/table_device_view.cuh
@@ -175,7 +175,7 @@ class table_device_view : public detail::table_device_view_base<column_device_vi
    * available in device memory
    */
   static auto create(table_view source_view,
-                     rmm::cuda_stream_view stream = cudf::default_stream_value)
+                     rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     auto deleter = [](table_device_view* t) { t->destroy(); };
     return std::unique_ptr<table_device_view, decltype(deleter)>{
@@ -212,7 +212,7 @@ class mutable_table_device_view
    * available in device memory
    */
   static auto create(mutable_table_view source_view,
-                     rmm::cuda_stream_view stream = cudf::default_stream_value)
+                     rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     auto deleter = [](mutable_table_device_view* t) { t->destroy(); };
     return std::unique_ptr<mutable_table_device_view, decltype(deleter)>{
diff --git a/cpp/include/cudf/utilities/default_stream.hpp b/cpp/include/cudf/utilities/default_stream.hpp
index 94bc01787e3..1eec3b994d0 100644
--- a/cpp/include/cudf/utilities/default_stream.hpp
+++ b/cpp/include/cudf/utilities/default_stream.hpp
@@ -16,21 +16,19 @@
 
 #pragma once
 
+#include <cudf/detail/utilities/default_stream.hpp>
+
+#include <rmm/cuda_stream.hpp>
 #include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {
 
 /**
- * @brief Default stream for cudf
+ * @brief Get the current default stream
  *
- * Use this value to ensure the correct stream is used when compiled with per
- * thread default stream.
+ * @return The current default stream.
  */
-#if defined(CUDF_USE_PER_THREAD_DEFAULT_STREAM)
-static const rmm::cuda_stream_view default_stream_value{rmm::cuda_stream_per_thread};
-#else
-static constexpr rmm::cuda_stream_view default_stream_value{};
-#endif
+rmm::cuda_stream_view const get_default_stream();
 
 /**
  * @brief Check if per-thread default stream is enabled.
diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp
index e529785a758..be4d5bccd7b 100644
--- a/cpp/include/cudf_test/base_fixture.hpp
+++ b/cpp/include/cudf_test/base_fixture.hpp
@@ -18,12 +18,15 @@
 
 #include <random>
 
+#include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/cxxopts.hpp>
 #include <cudf_test/file_utilities.hpp>
+#include <cudf_test/stream_checking_resource_adapter.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
 #include <rmm/mr/device/binning_memory_resource.hpp>
 #include <rmm/mr/device/cuda_async_memory_resource.hpp>
@@ -303,11 +306,18 @@ inline auto parse_cudf_test_opts(int argc, char** argv)
   try {
     cxxopts::Options options(argv[0], " - cuDF tests command line options");
     const char* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE");  // Overridden by CLI options
+    const char* env_stream_mode =
+      std::getenv("GTEST_CUDF_STREAM_MODE");  // Overridden by CLI options
     auto default_rmm_mode    = env_rmm_mode ? env_rmm_mode : "pool";
+    auto default_stream_mode = env_stream_mode ? env_stream_mode : "default";
     options.allow_unrecognised_options().add_options()(
       "rmm_mode",
       "RMM allocation mode",
       cxxopts::value<std::string>()->default_value(default_rmm_mode));
+    options.allow_unrecognised_options().add_options()(
+      "stream_mode",
+      "Whether to use a non-default stream",
+      cxxopts::value<std::string>()->default_value(default_stream_mode));
     return options.parse(argc, argv);
   } catch (const cxxopts::OptionException& e) {
     CUDF_FAIL("Error parsing command line options");
@@ -324,13 +334,21 @@ inline auto parse_cudf_test_opts(int argc, char** argv)
  * function parses the command line to customize test behavior, like the
  * allocation mode used for creating the default memory resource.
  */
-#define CUDF_TEST_PROGRAM_MAIN()                                        \
-  int main(int argc, char** argv)                                       \
-  {                                                                     \
-    ::testing::InitGoogleTest(&argc, argv);                             \
-    auto const cmd_opts = parse_cudf_test_opts(argc, argv);             \
-    auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();       \
-    auto resource       = cudf::test::create_memory_resource(rmm_mode); \
-    rmm::mr::set_current_device_resource(resource.get());               \
-    return RUN_ALL_TESTS();                                             \
+#define CUDF_TEST_PROGRAM_MAIN()                                            \
+  int main(int argc, char** argv)                                           \
+  {                                                                         \
+    ::testing::InitGoogleTest(&argc, argv);                                 \
+    auto const cmd_opts = parse_cudf_test_opts(argc, argv);                 \
+    auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();           \
+    auto resource       = cudf::test::create_memory_resource(rmm_mode);     \
+    rmm::mr::set_current_device_resource(resource.get());                   \
+                                                                            \
+    auto const stream_mode = cmd_opts["stream_mode"].as<std::string>();     \
+    rmm::cuda_stream const new_default_stream{};                            \
+    if (stream_mode == "custom") {                                          \
+      auto adapter = make_stream_checking_resource_adaptor(resource.get()); \
+      rmm::mr::set_current_device_resource(&adapter);                       \
+    }                                                                       \
+                                                                            \
+    return RUN_ALL_TESTS();                                                 \
   }
diff --git a/cpp/include/cudf_test/column_utilities.hpp b/cpp/include/cudf_test/column_utilities.hpp
index b7d890fb315..2cc90743912 100644
--- a/cpp/include/cudf_test/column_utilities.hpp
+++ b/cpp/include/cudf_test/column_utilities.hpp
@@ -241,11 +241,11 @@ inline std::pair<thrust::host_vector<std::string>, std::vector<bitmask_type>> to
   auto const scv     = strings_column_view(c);
   auto const h_chars = cudf::detail::make_std_vector_sync<char>(
     cudf::device_span<char const>(scv.chars().data<char>(), scv.chars().size()),
-    cudf::default_stream_value);
+    cudf::get_default_stream());
   auto const h_offsets = cudf::detail::make_std_vector_sync(
     cudf::device_span<cudf::offset_type const>(
       scv.offsets().data<cudf::offset_type>() + scv.offset(), scv.size() + 1),
-    cudf::default_stream_value);
+    cudf::get_default_stream());
 
   // build std::string vector from chars and offsets
   std::vector<std::string> host_data;
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index 8827372b3fd..f9f571c252a 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -170,7 +170,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
   auto transform_begin = thrust::make_transform_iterator(begin, transformer);
   auto const size      = cudf::distance(begin, end);
   auto const elements  = thrust::host_vector<ElementTo>(transform_begin, transform_begin + size);
-  return rmm::device_buffer{elements.data(), size * sizeof(ElementTo), cudf::default_stream_value};
+  return rmm::device_buffer{elements.data(), size * sizeof(ElementTo), cudf::get_default_stream()};
 }
 
 /**
@@ -196,7 +196,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
   auto transform_begin = thrust::make_transform_iterator(begin, transformer);
   auto const size      = cudf::distance(begin, end);
   auto const elements  = thrust::host_vector<RepType>(transform_begin, transform_begin + size);
-  return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::default_stream_value};
+  return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::get_default_stream()};
 }
 
 /**
@@ -223,7 +223,7 @@ rmm::device_buffer make_elements(InputIterator begin, InputIterator end)
   auto transformer_begin = thrust::make_transform_iterator(begin, to_rep);
   auto const size        = cudf::distance(begin, end);
   auto const elements = thrust::host_vector<RepType>(transformer_begin, transformer_begin + size);
-  return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::default_stream_value};
+  return rmm::device_buffer{elements.data(), size * sizeof(RepType), cudf::get_default_stream()};
 }
 
 /**
@@ -271,7 +271,7 @@ rmm::device_buffer make_null_mask(ValidityIterator begin, ValidityIterator end)
   auto null_mask = make_null_mask_vector(begin, end);
   return rmm::device_buffer{null_mask.data(),
                             null_mask.size() * sizeof(decltype(null_mask.front())),
-                            cudf::default_stream_value};
+                            cudf::get_default_stream()};
 }
 
 /**
@@ -547,7 +547,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper {
     wrapped.reset(new cudf::column{
       data_type,
       size,
-      rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::default_stream_value}});
+      rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()}});
   }
 
   /**
@@ -611,7 +611,7 @@ class fixed_point_column_wrapper : public detail::column_wrapper {
     wrapped.reset(new cudf::column{
       data_type,
       size,
-      rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::default_stream_value},
+      rmm::device_buffer{elements.data(), size * sizeof(Rep), cudf::get_default_stream()},
       detail::make_null_mask(v, v + size),
       cudf::UNKNOWN_NULL_COUNT});
   }
diff --git a/cpp/include/cudf_test/stream_checking_resource_adapter.hpp b/cpp/include/cudf_test/stream_checking_resource_adapter.hpp
new file mode 100644
index 00000000000..4a22ff148ae
--- /dev/null
+++ b/cpp/include/cudf_test/stream_checking_resource_adapter.hpp
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+/**
+ * @brief Resource that verifies that the default stream is not used in any allocation.
+ *
+ * @tparam Upstream Type of the upstream resource used for
+ * allocation/deallocation.
+ */
+template <typename Upstream>
+class stream_checking_resource_adaptor final : public rmm::mr::device_memory_resource {
+ public:
+  /**
+   * @brief Construct a new adaptor.
+   *
+   * @throws `cudf::logic_error` if `upstream == nullptr`
+   *
+   * @param upstream The resource used for allocating/deallocating device memory
+   */
+  stream_checking_resource_adaptor(Upstream* upstream) : upstream_{upstream}
+  {
+    CUDF_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer.");
+  }
+
+  stream_checking_resource_adaptor()                                        = delete;
+  ~stream_checking_resource_adaptor() override                              = default;
+  stream_checking_resource_adaptor(stream_checking_resource_adaptor const&) = delete;
+  stream_checking_resource_adaptor& operator=(stream_checking_resource_adaptor const&) = delete;
+  stream_checking_resource_adaptor(stream_checking_resource_adaptor&&) noexcept        = default;
+  stream_checking_resource_adaptor& operator=(stream_checking_resource_adaptor&&) noexcept =
+    default;
+
+  /**
+   * @brief Return pointer to the upstream resource.
+   *
+   * @return Pointer to the upstream resource.
+   */
+  Upstream* get_upstream() const noexcept { return upstream_; }
+
+  /**
+   * @brief Checks whether the upstream resource supports streams.
+   *
+   * @return Whether or not the upstream resource supports streams
+   */
+  bool supports_streams() const noexcept override { return upstream_->supports_streams(); }
+
+  /**
+   * @brief Query whether the resource supports the get_mem_info API.
+   *
+   * @return Whether or not the upstream resource supports get_mem_info
+   */
+  bool supports_get_mem_info() const noexcept override
+  {
+    return upstream_->supports_get_mem_info();
+  }
+
+ private:
+  /**
+   * @brief Allocates memory of size at least `bytes` using the upstream
+   * resource as long as it fits inside the allocation limit.
+   *
+   * The returned pointer has at least 256B alignment.
+   *
+   * @throws `rmm::bad_alloc` if the requested allocation could not be fulfilled
+   * by the upstream resource.
+   * @throws `cudf::logic_error` if attempted on a default stream
+   *
+   * @param bytes The size, in bytes, of the allocation
+   * @param stream Stream on which to perform the allocation
+   * @return Pointer to the newly allocated memory
+   */
+  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override
+  {
+    verify_non_default_stream(stream);
+    return upstream_->allocate(bytes, stream);
+  }
+
+  /**
+   * @brief Free allocation of size `bytes` pointed to by `ptr`
+   *
+   * @throws `cudf::logic_error` if attempted on a default stream
+   *
+   * @param ptr Pointer to be deallocated
+   * @param bytes Size of the allocation
+   * @param stream Stream on which to perform the deallocation
+   */
+  void do_deallocate(void* ptr, std::size_t bytes, rmm::cuda_stream_view stream) override
+  {
+    verify_non_default_stream(stream);
+    upstream_->deallocate(ptr, bytes, stream);
+  }
+
+  /**
+   * @brief Compare the upstream resource to another.
+   *
+   * @param other The other resource to compare to
+   * @return Whether or not the two resources are equivalent
+   */
+  bool do_is_equal(device_memory_resource const& other) const noexcept override
+  {
+    if (this == &other) { return true; }
+    auto cast = dynamic_cast<stream_checking_resource_adaptor<Upstream> const*>(&other);
+    return cast != nullptr ? upstream_->is_equal(*cast->get_upstream())
+                           : upstream_->is_equal(other);
+  }
+
+  /**
+   * @brief Get free and available memory from upstream resource.
+   *
+   * @throws `rmm::cuda_error` if unable to retrieve memory info.
+   * @throws `cudf::logic_error` if attempted on a default stream
+   *
+   * @param stream Stream on which to get the mem info.
+   * @return std::pair with available and free memory for resource
+   */
+  std::pair<std::size_t, std::size_t> do_get_mem_info(rmm::cuda_stream_view stream) const override
+  {
+    verify_non_default_stream(stream);
+    return upstream_->get_mem_info(stream);
+  }
+
+  /**
+   * @brief Throw an error if given one of CUDA's default stream specifiers.
+   *
+   * @throws `std::runtime_error` if provided a default stream
+   */
+  void verify_non_default_stream(rmm::cuda_stream_view const stream) const
+  {
+    auto cstream{stream.value()};
+    if (cstream == cudaStreamDefault || (cstream == cudaStreamLegacy) ||
+        (cstream == cudaStreamPerThread)) {
+      throw std::runtime_error("Attempted to perform an operation on a default stream!");
+    }
+  }
+
+  Upstream* upstream_;  // the upstream resource used for satisfying allocation requests
+};
+
+/**
+ * @brief Convenience factory to return a `stream_checking_resource_adaptor` around the
+ * upstream resource `upstream`.
+ *
+ * @tparam Upstream Type of the upstream `device_memory_resource`.
+ * @param upstream Pointer to the upstream resource
+ */
+template <typename Upstream>
+stream_checking_resource_adaptor<Upstream> make_stream_checking_resource_adaptor(Upstream* upstream)
+{
+  return stream_checking_resource_adaptor<Upstream>{upstream};
+}
diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
index 1a75016d78c..6e1982164e5 100644
--- a/cpp/include/cudf_test/tdigest_utilities.cuh
+++ b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -118,11 +118,11 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
   // verify min/max
   thrust::host_vector<device_span<T const>> h_spans;
   h_spans.push_back({input_values.begin<T>(), static_cast<size_t>(input_values.size())});
-  auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::default_stream_value);
+  auto spans = cudf::detail::make_device_uvector_async(h_spans, cudf::get_default_stream());
 
   auto expected_min = cudf::make_fixed_width_column(
     data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     spans.begin(),
                     spans.end(),
                     expected_min->mutable_view().template begin<double>(),
@@ -132,7 +132,7 @@ void tdigest_minmax_compare(cudf::tdigest::tdigest_column_view const& tdv,
 
   auto expected_max = cudf::make_fixed_width_column(
     data_type{type_id::FLOAT64}, spans.size(), mask_state::UNALLOCATED);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     spans.begin(),
                     spans.end(),
                     expected_max->mutable_view().template begin<double>(),
diff --git a/cpp/include/nvtext/bpe_tokenize.hpp b/cpp/include/nvtext/bpe_tokenize.hpp
index 97e354cb39b..b93d93b07c6 100644
--- a/cpp/include/nvtext/bpe_tokenize.hpp
+++ b/cpp/include/nvtext/bpe_tokenize.hpp
@@ -46,7 +46,7 @@ struct bpe_merge_pairs {
    * @param mr Device memory resource used to allocate the device memory
    */
   bpe_merge_pairs(std::unique_ptr<cudf::column>&& input,
-                  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   /**
@@ -57,7 +57,7 @@ struct bpe_merge_pairs {
    * @param mr Device memory resource used to allocate the device memory
    */
   bpe_merge_pairs(cudf::strings_column_view const& input,
-                  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+                  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
                   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
   ~bpe_merge_pairs();
diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index 2b5d0bb855e..c06e6211654 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -38,7 +38,7 @@ namespace detail {
 std::unique_ptr<cudf::column> tokenize(
   cudf::strings_column_view const& strings,
   cudf::string_scalar const& delimiter = cudf::string_scalar{""},
-  rmm::cuda_stream_view stream         = cudf::default_stream_value,
+  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
 
 /**
@@ -54,7 +54,7 @@ std::unique_ptr<cudf::column> tokenize(
 std::unique_ptr<cudf::column> tokenize(
   cudf::strings_column_view const& strings,
   cudf::strings_column_view const& delimiters,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -71,7 +71,7 @@ std::unique_ptr<cudf::column> tokenize(
 std::unique_ptr<cudf::column> count_tokens(
   cudf::strings_column_view const& strings,
   cudf::string_scalar const& delimiter = cudf::string_scalar{""},
-  rmm::cuda_stream_view stream         = cudf::default_stream_value,
+  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
 
 /**
@@ -87,7 +87,7 @@ std::unique_ptr<cudf::column> count_tokens(
 std::unique_ptr<cudf::column> count_tokens(
   cudf::strings_column_view const& strings,
   cudf::strings_column_view const& delimiters,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/src/binaryop/binaryop.cpp b/cpp/src/binaryop/binaryop.cpp
index 4b79cc0581a..83ad8aa4cee 100644
--- a/cpp/src/binaryop/binaryop.cpp
+++ b/cpp/src/binaryop/binaryop.cpp
@@ -406,7 +406,7 @@ std::unique_ptr<column> binary_operation(scalar const& lhs,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::binary_operation(lhs, rhs, op, output_type, cudf::default_stream_value, mr);
+  return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr);
 }
 std::unique_ptr<column> binary_operation(column_view const& lhs,
                                          scalar const& rhs,
@@ -415,7 +415,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::binary_operation(lhs, rhs, op, output_type, cudf::default_stream_value, mr);
+  return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr);
 }
 std::unique_ptr<column> binary_operation(column_view const& lhs,
                                          column_view const& rhs,
@@ -424,7 +424,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::binary_operation(lhs, rhs, op, output_type, cudf::default_stream_value, mr);
+  return detail::binary_operation(lhs, rhs, op, output_type, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> binary_operation(column_view const& lhs,
@@ -434,7 +434,7 @@ std::unique_ptr<column> binary_operation(column_view const& lhs,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::default_stream_value, mr);
+  return detail::binary_operation(lhs, rhs, ptx, output_type, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/binaryop/compiled/binary_ops.hpp b/cpp/src/binaryop/compiled/binary_ops.hpp
index 1f711b7c899..c51993409ef 100644
--- a/cpp/src/binaryop/compiled/binary_ops.hpp
+++ b/cpp/src/binaryop/compiled/binary_ops.hpp
@@ -37,7 +37,7 @@ std::unique_ptr<column> string_null_min_max(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> string_null_min_max(
@@ -45,7 +45,7 @@ std::unique_ptr<column> string_null_min_max(
   scalar const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<column> string_null_min_max(
@@ -53,7 +53,7 @@ std::unique_ptr<column> string_null_min_max(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -80,7 +80,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -107,7 +107,7 @@ std::unique_ptr<column> binary_operation(
   scalar const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -133,7 +133,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 void binary_operation(mutable_column_view& out,
diff --git a/cpp/src/binaryop/compiled/struct_binary_ops.cuh b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
index def9ebcef97..2fcf1ce4e32 100644
--- a/cpp/src/binaryop/compiled/struct_binary_ops.cuh
+++ b/cpp/src/binaryop/compiled/struct_binary_ops.cuh
@@ -71,7 +71,7 @@ void apply_struct_binary_op(mutable_column_view& out,
                             bool is_lhs_scalar,
                             bool is_rhs_scalar,
                             PhysicalElementComparator comparator = {},
-                            rmm::cuda_stream_view stream         = cudf::default_stream_value)
+                            rmm::cuda_stream_view stream         = cudf::get_default_stream())
 {
   auto const compare_orders = std::vector<order>(
     lhs.size(),
@@ -115,7 +115,7 @@ void apply_struct_equality_op(mutable_column_view& out,
                               bool is_rhs_scalar,
                               binary_operator op,
                               PhysicalEqualityComparator comparator = {},
-                              rmm::cuda_stream_view stream          = cudf::default_stream_value)
+                              rmm::cuda_stream_view stream          = cudf::get_default_stream())
 {
   CUDF_EXPECTS(op == binary_operator::EQUAL || op == binary_operator::NOT_EQUAL ||
                  op == binary_operator::NULL_EQUALS,
diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu
index 4c9151533c2..6fef15d58d7 100644
--- a/cpp/src/bitmask/null_mask.cu
+++ b/cpp/src/bitmask/null_mask.cu
@@ -158,7 +158,7 @@ rmm::device_buffer create_null_mask(size_type size,
                                     mask_state state,
                                     rmm::mr::device_memory_resource* mr)
 {
-  return detail::create_null_mask(size, state, cudf::default_stream_value, mr);
+  return detail::create_null_mask(size, state, cudf::get_default_stream(), mr);
 }
 
 // Set pre-allocated null mask of given bit range [begin_bit, end_bit) to valid, if valid==true,
@@ -510,25 +510,25 @@ rmm::device_buffer copy_bitmask(bitmask_type const* mask,
                                 size_type end_bit,
                                 rmm::mr::device_memory_resource* mr)
 {
-  return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::default_stream_value, mr);
+  return detail::copy_bitmask(mask, begin_bit, end_bit, cudf::get_default_stream(), mr);
 }
 
 // Create a bitmask from a column view
 rmm::device_buffer copy_bitmask(column_view const& view, rmm::mr::device_memory_resource* mr)
 {
-  return detail::copy_bitmask(view, cudf::default_stream_value, mr);
+  return detail::copy_bitmask(view, cudf::get_default_stream(), mr);
 }
 
 std::pair<rmm::device_buffer, size_type> bitmask_and(table_view const& view,
                                                      rmm::mr::device_memory_resource* mr)
 {
-  return detail::bitmask_and(view, cudf::default_stream_value, mr);
+  return detail::bitmask_and(view, cudf::get_default_stream(), mr);
 }
 
 std::pair<rmm::device_buffer, size_type> bitmask_or(table_view const& view,
                                                     rmm::mr::device_memory_resource* mr)
 {
-  return detail::bitmask_or(view, cudf::default_stream_value, mr);
+  return detail::bitmask_or(view, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu
index 61dfea6c26a..7b862373a5b 100644
--- a/cpp/src/column/column.cu
+++ b/cpp/src/column/column.cu
@@ -144,7 +144,7 @@ size_type column::null_count() const
   CUDF_FUNC_RANGE();
   if (_null_count <= cudf::UNKNOWN_NULL_COUNT) {
     _null_count = cudf::detail::null_count(
-      static_cast<bitmask_type const*>(_null_mask.data()), 0, size(), cudf::default_stream_value);
+      static_cast<bitmask_type const*>(_null_mask.data()), 0, size(), cudf::get_default_stream());
   }
   return _null_count;
 }
@@ -182,7 +182,7 @@ void column::set_null_count(size_type new_null_count)
 namespace {
 struct create_column_from_view {
   cudf::column_view view;
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
   rmm::mr::device_memory_resource* mr;
 
   template <typename ColumnType,
diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp
index 2ff088a3f20..3e18b9734f6 100644
--- a/cpp/src/column/column_view.cpp
+++ b/cpp/src/column/column_view.cpp
@@ -68,7 +68,7 @@ size_type column_view_base::null_count() const
 {
   if (_null_count <= cudf::UNKNOWN_NULL_COUNT) {
     _null_count = cudf::detail::null_count(
-      null_mask(), offset(), offset() + size(), cudf::default_stream_value);
+      null_mask(), offset(), offset() + size(), cudf::get_default_stream());
   }
   return _null_count;
 }
@@ -79,7 +79,7 @@ size_type column_view_base::null_count(size_type begin, size_type end) const
   return (null_count() == 0)
            ? 0
            : cudf::detail::null_count(
-               null_mask(), offset() + begin, offset() + end, cudf::default_stream_value);
+               null_mask(), offset() + begin, offset() + end, cudf::get_default_stream());
 }
 
 // Struct to use custom hash combine and fold expression
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index b770eef1c3a..802b47e4664 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -557,7 +557,7 @@ rmm::device_buffer concatenate_masks(host_span<column_view const> views,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate_masks(views, cudf::default_stream_value, mr);
+  return detail::concatenate_masks(views, cudf::get_default_stream(), mr);
 }
 
 // Concatenates the elements from a vector of column_views
@@ -565,14 +565,14 @@ std::unique_ptr<column> concatenate(host_span<column_view const> columns_to_conc
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate(columns_to_concat, cudf::default_stream_value, mr);
+  return detail::concatenate(columns_to_concat, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> concatenate(host_span<table_view const> tables_to_concat,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate(tables_to_concat, cudf::default_stream_value, mr);
+  return detail::concatenate(tables_to_concat, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu
index 0c90eb539fc..c52ca1f74df 100644
--- a/cpp/src/copying/contiguous_split.cu
+++ b/cpp/src/copying/contiguous_split.cu
@@ -1269,7 +1269,7 @@ std::vector<packed_table> contiguous_split(cudf::table_view const& input,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contiguous_split(input, splits, cudf::default_stream_value, mr);
+  return detail::contiguous_split(input, splits, cudf::get_default_stream(), mr);
 }
 
 };  // namespace cudf
diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp
index d9a16315488..00147277231 100644
--- a/cpp/src/copying/copy.cpp
+++ b/cpp/src/copying/copy.cpp
@@ -183,7 +183,7 @@ std::unique_ptr<column> allocate_like(column_view const& input,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::allocate_like(input, input.size(), mask_alloc, cudf::default_stream_value, mr);
+  return detail::allocate_like(input, input.size(), mask_alloc, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> allocate_like(column_view const& input,
@@ -192,7 +192,7 @@ std::unique_ptr<column> allocate_like(column_view const& input,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::allocate_like(input, size, mask_alloc, cudf::default_stream_value, mr);
+  return detail::allocate_like(input, size, mask_alloc, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/copy.cu b/cpp/src/copying/copy.cu
index 7e5b9288628..0978cf441d8 100644
--- a/cpp/src/copying/copy.cu
+++ b/cpp/src/copying/copy.cu
@@ -410,7 +410,7 @@ std::unique_ptr<column> copy_if_else(column_view const& lhs,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr);
+  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> copy_if_else(scalar const& lhs,
@@ -419,7 +419,7 @@ std::unique_ptr<column> copy_if_else(scalar const& lhs,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr);
+  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> copy_if_else(column_view const& lhs,
@@ -428,7 +428,7 @@ std::unique_ptr<column> copy_if_else(column_view const& lhs,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr);
+  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> copy_if_else(scalar const& lhs,
@@ -437,7 +437,7 @@ std::unique_ptr<column> copy_if_else(scalar const& lhs,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::default_stream_value, mr);
+  return detail::copy_if_else(lhs, rhs, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu
index 080a8f645bd..c5fa3a73e1a 100644
--- a/cpp/src/copying/copy_range.cu
+++ b/cpp/src/copying/copy_range.cu
@@ -274,7 +274,7 @@ void copy_range_in_place(column_view const& source,
 {
   CUDF_FUNC_RANGE();
   return detail::copy_range_in_place(
-    source, target, source_begin, source_end, target_begin, cudf::default_stream_value);
+    source, target, source_begin, source_end, target_begin, cudf::get_default_stream());
 }
 
 std::unique_ptr<column> copy_range(column_view const& source,
@@ -286,7 +286,7 @@ std::unique_ptr<column> copy_range(column_view const& source,
 {
   CUDF_FUNC_RANGE();
   return detail::copy_range(
-    source, target, source_begin, source_end, target_begin, cudf::default_stream_value, mr);
+    source, target, source_begin, source_end, target_begin, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/gather.cu b/cpp/src/copying/gather.cu
index d00d3a2a43e..93d05757722 100644
--- a/cpp/src/copying/gather.cu
+++ b/cpp/src/copying/gather.cu
@@ -85,7 +85,7 @@ std::unique_ptr<table> gather(table_view const& source_table,
                                                      : detail::negative_index_policy::ALLOWED;
 
   return detail::gather(
-    source_table, gather_map, bounds_policy, index_policy, cudf::default_stream_value, mr);
+    source_table, gather_map, bounds_policy, index_policy, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/get_element.cu b/cpp/src/copying/get_element.cu
index f12b4639b25..5e76b4adbbe 100644
--- a/cpp/src/copying/get_element.cu
+++ b/cpp/src/copying/get_element.cu
@@ -210,7 +210,7 @@ std::unique_ptr<scalar> get_element(column_view const& input,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::get_element(input, index, cudf::default_stream_value, mr);
+  return detail::get_element(input, index, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/pack.cpp b/cpp/src/copying/pack.cpp
index 5bc425ab7f5..427f2dfdade 100644
--- a/cpp/src/copying/pack.cpp
+++ b/cpp/src/copying/pack.cpp
@@ -219,7 +219,7 @@ table_view unpack(uint8_t const* metadata, uint8_t const* gpu_data)
 packed_columns pack(cudf::table_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::pack(input, cudf::default_stream_value, mr);
+  return detail::pack(input, cudf::get_default_stream(), mr);
 }
 
 /**
diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu
index 30de538ec7a..35eb13119f7 100644
--- a/cpp/src/copying/purge_nonempty_nulls.cu
+++ b/cpp/src/copying/purge_nonempty_nulls.cu
@@ -112,7 +112,7 @@ bool has_nonempty_nulls(column_view const& input) { return detail::has_nonempty_
 std::unique_ptr<cudf::column> purge_nonempty_nulls(lists_column_view const& input,
                                                    rmm::mr::device_memory_resource* mr)
 {
-  return detail::purge_nonempty_nulls(input, cudf::default_stream_value, mr);
+  return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -121,7 +121,7 @@ std::unique_ptr<cudf::column> purge_nonempty_nulls(lists_column_view const& inpu
 std::unique_ptr<cudf::column> purge_nonempty_nulls(structs_column_view const& input,
                                                    rmm::mr::device_memory_resource* mr)
 {
-  return detail::purge_nonempty_nulls(input, cudf::default_stream_value, mr);
+  return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -130,7 +130,7 @@ std::unique_ptr<cudf::column> purge_nonempty_nulls(structs_column_view const& in
 std::unique_ptr<cudf::column> purge_nonempty_nulls(strings_column_view const& input,
                                                    rmm::mr::device_memory_resource* mr)
 {
-  return detail::purge_nonempty_nulls(input, cudf::default_stream_value, mr);
+  return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/reverse.cu b/cpp/src/copying/reverse.cu
index a1ffa115ad1..cf8ca7d9a92 100644
--- a/cpp/src/copying/reverse.cu
+++ b/cpp/src/copying/reverse.cu
@@ -57,13 +57,13 @@ std::unique_ptr<column> reverse(column_view const& source_column,
 std::unique_ptr<table> reverse(table_view const& source_table, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::reverse(source_table, cudf::default_stream_value, mr);
+  return detail::reverse(source_table, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> reverse(column_view const& source_column,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::reverse(source_column, cudf::default_stream_value, mr);
+  return detail::reverse(source_column, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/copying/sample.cu b/cpp/src/copying/sample.cu
index 9a164bd053a..27a3f145caa 100644
--- a/cpp/src/copying/sample.cu
+++ b/cpp/src/copying/sample.cu
@@ -93,6 +93,6 @@ std::unique_ptr<table> sample(table_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sample(input, n, replacement, seed, cudf::default_stream_value, mr);
+  return detail::sample(input, n, replacement, seed, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 63711a43c3b..7b6ff80e3e4 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -490,7 +490,7 @@ std::unique_ptr<table> scatter(table_view const& source,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::scatter(source, scatter_map, target, cudf::default_stream_value, mr);
+  return detail::scatter(source, scatter_map, target, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>> const& source,
@@ -499,7 +499,7 @@ std::unique_ptr<table> scatter(std::vector<std::reference_wrapper<const scalar>>
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::scatter(source, indices, target, cudf::default_stream_value, mr);
+  return detail::scatter(source, indices, target, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> boolean_mask_scatter(table_view const& input,
@@ -508,7 +508,7 @@ std::unique_ptr<table> boolean_mask_scatter(table_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::default_stream_value, mr);
+  return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> boolean_mask_scatter(
@@ -518,7 +518,7 @@ std::unique_ptr<table> boolean_mask_scatter(
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::default_stream_value, mr);
+  return detail::boolean_mask_scatter(input, target, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/shift.cu b/cpp/src/copying/shift.cu
index 607388cff56..a6126374ed2 100644
--- a/cpp/src/copying/shift.cu
+++ b/cpp/src/copying/shift.cu
@@ -174,7 +174,7 @@ std::unique_ptr<column> shift(column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::shift(input, offset, fill_value, cudf::default_stream_value, mr);
+  return detail::shift(input, offset, fill_value, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/copying/slice.cu b/cpp/src/copying/slice.cu
index e329756b0df..52410ada128 100644
--- a/cpp/src/copying/slice.cu
+++ b/cpp/src/copying/slice.cu
@@ -114,25 +114,25 @@ std::vector<table_view> slice(table_view const& input,
 std::vector<column_view> slice(column_view const& input, host_span<size_type const> indices)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice(input, indices, cudf::default_stream_value);
+  return detail::slice(input, indices, cudf::get_default_stream());
 }
 
 std::vector<table_view> slice(table_view const& input, host_span<size_type const> indices)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice(input, indices, cudf::default_stream_value);
+  return detail::slice(input, indices, cudf::get_default_stream());
 };
 
 std::vector<column_view> slice(column_view const& input, std::initializer_list<size_type> indices)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice(input, indices, cudf::default_stream_value);
+  return detail::slice(input, indices, cudf::get_default_stream());
 }
 
 std::vector<table_view> slice(table_view const& input, std::initializer_list<size_type> indices)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice(input, indices, cudf::default_stream_value);
+  return detail::slice(input, indices, cudf::get_default_stream());
 };
 
 }  // namespace cudf
diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp
index 19ecd959172..b577886febf 100644
--- a/cpp/src/copying/split.cpp
+++ b/cpp/src/copying/split.cpp
@@ -86,26 +86,26 @@ std::vector<cudf::column_view> split(cudf::column_view const& input,
                                      host_span<size_type const> splits)
 {
   CUDF_FUNC_RANGE();
-  return detail::split(input, splits, cudf::default_stream_value);
+  return detail::split(input, splits, cudf::get_default_stream());
 }
 
 std::vector<cudf::table_view> split(cudf::table_view const& input,
                                     host_span<size_type const> splits)
 {
   CUDF_FUNC_RANGE();
-  return detail::split(input, splits, cudf::default_stream_value);
+  return detail::split(input, splits, cudf::get_default_stream());
 }
 
 std::vector<column_view> split(column_view const& input, std::initializer_list<size_type> splits)
 {
   CUDF_FUNC_RANGE();
-  return detail::split(input, splits, cudf::default_stream_value);
+  return detail::split(input, splits, cudf::get_default_stream());
 }
 
 std::vector<table_view> split(table_view const& input, std::initializer_list<size_type> splits)
 {
   CUDF_FUNC_RANGE();
-  return detail::split(input, splits, cudf::default_stream_value);
+  return detail::split(input, splits, cudf::get_default_stream());
 }
 
 }  // namespace cudf
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index e89792525c9..db1d04259b5 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -583,7 +583,7 @@ std::unique_ptr<column> ceil_datetimes(column_view const& column,
 {
   CUDF_FUNC_RANGE();
   return detail::round_general(
-    detail::rounding_function::CEIL, freq, column, cudf::default_stream_value, mr);
+    detail::rounding_function::CEIL, freq, column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> floor_datetimes(column_view const& column,
@@ -592,7 +592,7 @@ std::unique_ptr<column> floor_datetimes(column_view const& column,
 {
   CUDF_FUNC_RANGE();
   return detail::round_general(
-    detail::rounding_function::FLOOR, freq, column, cudf::default_stream_value, mr);
+    detail::rounding_function::FLOOR, freq, column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> round_datetimes(column_view const& column,
@@ -601,87 +601,87 @@ std::unique_ptr<column> round_datetimes(column_view const& column,
 {
   CUDF_FUNC_RANGE();
   return detail::round_general(
-    detail::rounding_function::ROUND, freq, column, cudf::default_stream_value, mr);
+    detail::rounding_function::ROUND, freq, column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_year(column_view const& column, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_year(column, cudf::default_stream_value, mr);
+  return detail::extract_year(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_month(column_view const& column,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_month(column, cudf::default_stream_value, mr);
+  return detail::extract_month(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_day(column_view const& column, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_day(column, cudf::default_stream_value, mr);
+  return detail::extract_day(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_weekday(column_view const& column,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_weekday(column, cudf::default_stream_value, mr);
+  return detail::extract_weekday(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_hour(column_view const& column, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_hour(column, cudf::default_stream_value, mr);
+  return detail::extract_hour(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_minute(column_view const& column,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_minute(column, cudf::default_stream_value, mr);
+  return detail::extract_minute(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_second(column_view const& column,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_second(column, cudf::default_stream_value, mr);
+  return detail::extract_second(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_millisecond_fraction(column, cudf::default_stream_value, mr);
+  return detail::extract_millisecond_fraction(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
                                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_microsecond_fraction(column, cudf::default_stream_value, mr);
+  return detail::extract_microsecond_fraction(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
                                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_nanosecond_fraction(column, cudf::default_stream_value, mr);
+  return detail::extract_nanosecond_fraction(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> last_day_of_month(column_view const& column,
                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::last_day_of_month(column, cudf::default_stream_value, mr);
+  return detail::last_day_of_month(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> day_of_year(column_view const& column, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::day_of_year(column, cudf::default_stream_value, mr);
+  return detail::day_of_year(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> add_calendrical_months(cudf::column_view const& timestamp_column,
@@ -690,7 +690,7 @@ std::unique_ptr<cudf::column> add_calendrical_months(cudf::column_view const& ti
 {
   CUDF_FUNC_RANGE();
   return detail::add_calendrical_months(
-    timestamp_column, months_column, cudf::default_stream_value, mr);
+    timestamp_column, months_column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> add_calendrical_months(cudf::column_view const& timestamp_column,
@@ -698,27 +698,27 @@ std::unique_ptr<cudf::column> add_calendrical_months(cudf::column_view const& ti
                                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::add_calendrical_months(timestamp_column, months, cudf::default_stream_value, mr);
+  return detail::add_calendrical_months(timestamp_column, months, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_leap_year(column_view const& column, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_leap_year(column, cudf::default_stream_value, mr);
+  return detail::is_leap_year(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> days_in_month(column_view const& column,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::days_in_month(column, cudf::default_stream_value, mr);
+  return detail::days_in_month(column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> extract_quarter(column_view const& column,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_quarter(column, cudf::default_stream_value, mr);
+  return detail::extract_quarter(column, cudf::get_default_stream(), mr);
 }
 
 }  // namespace datetime
diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu
index 3dea491b6e4..0c4e20aa97f 100644
--- a/cpp/src/dictionary/add_keys.cu
+++ b/cpp/src/dictionary/add_keys.cu
@@ -132,7 +132,7 @@ std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::add_keys(dictionary_column, keys, cudf::default_stream_value, mr);
+  return detail::add_keys(dictionary_column, keys, cudf::get_default_stream(), mr);
 }
 
 }  // namespace dictionary
diff --git a/cpp/src/dictionary/decode.cu b/cpp/src/dictionary/decode.cu
index 22e2ee578a0..01411d06b62 100644
--- a/cpp/src/dictionary/decode.cu
+++ b/cpp/src/dictionary/decode.cu
@@ -68,7 +68,7 @@ std::unique_ptr<column> decode(dictionary_column_view const& source,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::decode(source, cudf::default_stream_value, mr);
+  return detail::decode(source, cudf::get_default_stream(), mr);
 }
 
 }  // namespace dictionary
diff --git a/cpp/src/dictionary/encode.cu b/cpp/src/dictionary/encode.cu
index 4e8f992b633..fe8e777b694 100644
--- a/cpp/src/dictionary/encode.cu
+++ b/cpp/src/dictionary/encode.cu
@@ -92,7 +92,7 @@ std::unique_ptr<column> encode(column_view const& input_column,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::encode(input_column, indices_type, cudf::default_stream_value, mr);
+  return detail::encode(input_column, indices_type, cudf::get_default_stream(), mr);
 }
 
 }  // namespace dictionary
diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu
index 4506ea98ca4..4f17fac3129 100644
--- a/cpp/src/dictionary/remove_keys.cu
+++ b/cpp/src/dictionary/remove_keys.cu
@@ -59,7 +59,7 @@ template <typename KeysKeeper>
 std::unique_ptr<column> remove_keys_fn(
   dictionary_column_view const& dictionary_column,
   KeysKeeper keys_to_keep_fn,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto const keys_view    = dictionary_column.keys();
@@ -151,7 +151,7 @@ std::unique_ptr<column> remove_keys_fn(
 std::unique_ptr<column> remove_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& keys_to_remove,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls");
@@ -168,7 +168,7 @@ std::unique_ptr<column> remove_keys(
 
 std::unique_ptr<column> remove_unused_keys(
   dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   // locate the keys to remove
@@ -200,14 +200,14 @@ std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_col
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::remove_keys(dictionary_column, keys_to_remove, cudf::default_stream_value, mr);
+  return detail::remove_keys(dictionary_column, keys_to_remove, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& dictionary_column,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::remove_unused_keys(dictionary_column, cudf::default_stream_value, mr);
+  return detail::remove_unused_keys(dictionary_column, cudf::get_default_stream(), mr);
 }
 
 }  // namespace dictionary
diff --git a/cpp/src/dictionary/search.cu b/cpp/src/dictionary/search.cu
index 3936f7470e5..8e97a387780 100644
--- a/cpp/src/dictionary/search.cu
+++ b/cpp/src/dictionary/search.cu
@@ -79,7 +79,7 @@ struct find_index_fn {
     using ScalarType = cudf::scalar_type_t<Element>;
     auto find_key    = static_cast<ScalarType const&>(key).value(stream);
     auto keys_view   = column_device_view::create(input.keys(), stream);
-    auto iter        = thrust::equal_range(rmm::exec_policy(cudf::default_stream_value),
+    auto iter        = thrust::equal_range(rmm::exec_policy(cudf::get_default_stream()),
                                     keys_view->begin<Element>(),
                                     keys_view->end<Element>(),
                                     find_key);
@@ -179,7 +179,7 @@ std::unique_ptr<scalar> get_index(dictionary_column_view const& dictionary,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::get_index(dictionary, key, cudf::default_stream_value, mr);
+  return detail::get_index(dictionary, key, cudf::get_default_stream(), mr);
 }
 
 }  // namespace dictionary
diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu
index 216f00c90e1..db0c4937582 100644
--- a/cpp/src/dictionary/set_keys.cu
+++ b/cpp/src/dictionary/set_keys.cu
@@ -245,14 +245,14 @@ std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::set_keys(dictionary_column, keys, cudf::default_stream_value, mr);
+  return detail::set_keys(dictionary_column, keys, cudf::get_default_stream(), mr);
 }
 
 std::vector<std::unique_ptr<column>> match_dictionaries(
   cudf::host_span<dictionary_column_view const> input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::match_dictionaries(input, cudf::default_stream_value, mr);
+  return detail::match_dictionaries(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace dictionary
diff --git a/cpp/src/filling/calendrical_month_sequence.cu b/cpp/src/filling/calendrical_month_sequence.cu
index d4b3e209c4a..f45634a615e 100644
--- a/cpp/src/filling/calendrical_month_sequence.cu
+++ b/cpp/src/filling/calendrical_month_sequence.cu
@@ -43,7 +43,7 @@ std::unique_ptr<cudf::column> calendrical_month_sequence(size_type size,
                                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::calendrical_month_sequence(size, init, months, cudf::default_stream_value, mr);
+  return detail::calendrical_month_sequence(size, init, months, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu
index 2abb0cf9795..290fff33cf6 100644
--- a/cpp/src/filling/fill.cu
+++ b/cpp/src/filling/fill.cu
@@ -248,7 +248,7 @@ void fill_in_place(mutable_column_view& destination,
                    scalar const& value)
 {
   CUDF_FUNC_RANGE();
-  return detail::fill_in_place(destination, begin, end, value, cudf::default_stream_value);
+  return detail::fill_in_place(destination, begin, end, value, cudf::get_default_stream());
 }
 
 std::unique_ptr<column> fill(column_view const& input,
@@ -258,7 +258,7 @@ std::unique_ptr<column> fill(column_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::fill(input, begin, end, value, cudf::default_stream_value, mr);
+  return detail::fill(input, begin, end, value, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/filling/repeat.cu b/cpp/src/filling/repeat.cu
index b2587e67350..8d86a9d9827 100644
--- a/cpp/src/filling/repeat.cu
+++ b/cpp/src/filling/repeat.cu
@@ -157,7 +157,7 @@ std::unique_ptr<table> repeat(table_view const& input_table,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat(input_table, count, cudf::default_stream_value, mr);
+  return detail::repeat(input_table, count, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> repeat(table_view const& input_table,
@@ -165,7 +165,7 @@ std::unique_ptr<table> repeat(table_view const& input_table,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat(input_table, count, cudf::default_stream_value, mr);
+  return detail::repeat(input_table, count, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/filling/sequence.cu b/cpp/src/filling/sequence.cu
index a2ae3b9e70c..284e7c46347 100644
--- a/cpp/src/filling/sequence.cu
+++ b/cpp/src/filling/sequence.cu
@@ -154,7 +154,7 @@ std::unique_ptr<column> sequence(size_type size,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sequence(size, init, step, cudf::default_stream_value, mr);
+  return detail::sequence(size, init, step, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> sequence(size_type size,
@@ -162,7 +162,7 @@ std::unique_ptr<column> sequence(size_type size,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sequence(size, init, cudf::default_stream_value, mr);
+  return detail::sequence(size, init, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu
index cd54e921a4c..dde0037a8c3 100644
--- a/cpp/src/groupby/groupby.cu
+++ b/cpp/src/groupby/groupby.cu
@@ -196,7 +196,7 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::aggr
 
   if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); }
 
-  return dispatch_aggregation(requests, cudf::default_stream_value, mr);
+  return dispatch_aggregation(requests, cudf::get_default_stream(), mr);
 }
 
 // Compute scan requests
@@ -214,13 +214,13 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby::scan
 
   if (_keys.num_rows() == 0) { return std::pair(empty_like(_keys), empty_results(requests)); }
 
-  return sort_scan(requests, cudf::default_stream_value, mr);
+  return sort_scan(requests, cudf::get_default_stream(), mr);
 }
 
 groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto grouped_keys = helper().sorted_keys(stream, mr);
 
   auto const& group_offsets       = helper().group_offsets(stream);
@@ -252,7 +252,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::replace_nulls
                "Size mismatch between num_columns and replace_policies.");
 
   if (values.is_empty()) { return std::pair(empty_like(_keys), empty_like(values)); }
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto const& group_labels = helper().group_labels(stream);
   std::vector<std::unique_ptr<column>> results;
@@ -298,7 +298,7 @@ std::pair<std::unique_ptr<table>, std::unique_ptr<table>> groupby::shift(
                 [&](auto i) { return values.column(i).type() == fill_values[i].get().type(); }),
     "values and fill_value should have the same type.");
 
-  auto stream = cudf::default_stream_value;
+  auto stream = cudf::get_default_stream();
   std::vector<std::unique_ptr<column>> results;
   auto const& group_offsets = helper().group_offsets(stream);
   std::transform(
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index c2081c596a1..a268e2ef778 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -159,7 +159,7 @@ class concurrent_unordered_map {
    * storage
    */
   static auto create(size_type capacity,
-                     rmm::cuda_stream_view stream     = cudf::default_stream_value,
+                     rmm::cuda_stream_view stream     = cudf::get_default_stream(),
                      const mapped_type unused_element = std::numeric_limits<mapped_type>::max(),
                      const key_type unused_key        = std::numeric_limits<key_type>::max(),
                      const Hasher& hash_function      = hasher(),
@@ -422,7 +422,7 @@ class concurrent_unordered_map {
   }
 
   void assign_async(const concurrent_unordered_map& other,
-                    rmm::cuda_stream_view stream = cudf::default_stream_value)
+                    rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     if (other.m_capacity <= m_capacity) {
       m_capacity = other.m_capacity;
@@ -440,7 +440,7 @@ class concurrent_unordered_map {
                                   stream.value()));
   }
 
-  void clear_async(rmm::cuda_stream_view stream = cudf::default_stream_value)
+  void clear_async(rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     constexpr int block_size = 128;
     init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
@@ -455,7 +455,7 @@ class concurrent_unordered_map {
     }
   }
 
-  void prefetch(const int dev_id, rmm::cuda_stream_view stream = cudf::default_stream_value)
+  void prefetch(const int dev_id, rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     cudaPointerAttributes hashtbl_values_ptr_attributes;
     cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
@@ -475,7 +475,7 @@ class concurrent_unordered_map {
    *
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
-  void destroy(rmm::cuda_stream_view stream = cudf::default_stream_value)
+  void destroy(rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
     delete this;
@@ -516,7 +516,7 @@ class concurrent_unordered_map {
                            const Hasher& hash_function,
                            const Equality& equal,
                            const allocator_type& allocator,
-                           rmm::cuda_stream_view stream = cudf::default_stream_value)
+                           rmm::cuda_stream_view stream = cudf::get_default_stream())
     : m_hf(hash_function),
       m_equal(equal),
       m_allocator(allocator),
diff --git a/cpp/src/hash/hash_allocator.cuh b/cpp/src/hash/hash_allocator.cuh
index 2da0a4fb4bd..b3d2556d392 100644
--- a/cpp/src/hash/hash_allocator.cuh
+++ b/cpp/src/hash/hash_allocator.cuh
@@ -38,14 +38,14 @@ struct managed_allocator {
   {
   }
 
-  T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::default_stream_value) const
+  T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::get_default_stream()) const
   {
     return static_cast<T*>(mr->allocate(n * sizeof(T), stream));
   }
 
   void deallocate(T* p,
                   std::size_t n,
-                  rmm::cuda_stream_view stream = cudf::default_stream_value) const
+                  rmm::cuda_stream_view stream = cudf::get_default_stream()) const
   {
     mr->deallocate(p, n * sizeof(T), stream);
   }
@@ -74,14 +74,14 @@ struct default_allocator {
   {
   }
 
-  T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::default_stream_value) const
+  T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::get_default_stream()) const
   {
     return static_cast<T*>(mr->allocate(n * sizeof(T), stream));
   }
 
   void deallocate(T* p,
                   std::size_t n,
-                  rmm::cuda_stream_view stream = cudf::default_stream_value) const
+                  rmm::cuda_stream_view stream = cudf::get_default_stream()) const
   {
     mr->deallocate(p, n * sizeof(T), stream);
   }
diff --git a/cpp/src/hash/hashing.cu b/cpp/src/hash/hashing.cu
index e5fac1e7c2c..150017d9117 100644
--- a/cpp/src/hash/hashing.cu
+++ b/cpp/src/hash/hashing.cu
@@ -74,7 +74,7 @@ std::unique_ptr<column> hash(table_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::hash(input, hash_function, seed, cudf::default_stream_value, mr);
+  return detail::hash(input, hash_function, seed, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp
index 7b300924dd5..58afc8e9015 100644
--- a/cpp/src/interop/dlpack.cpp
+++ b/cpp/src/interop/dlpack.cpp
@@ -299,13 +299,13 @@ std::unique_ptr<table> from_dlpack(DLManagedTensor const* managed_tensor,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_dlpack(managed_tensor, cudf::default_stream_value, mr);
+  return detail::from_dlpack(managed_tensor, cudf::get_default_stream(), mr);
 }
 
 DLManagedTensor* to_dlpack(table_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_dlpack(input, cudf::default_stream_value, mr);
+  return detail::to_dlpack(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/interop/from_arrow.cu b/cpp/src/interop/from_arrow.cu
index 86ea6f4427e..2d4501ec9f7 100644
--- a/cpp/src/interop/from_arrow.cu
+++ b/cpp/src/interop/from_arrow.cu
@@ -450,7 +450,7 @@ std::unique_ptr<table> from_arrow(arrow::Table const& input_table,
 {
   CUDF_FUNC_RANGE();
 
-  return detail::from_arrow(input_table, cudf::default_stream_value, mr);
+  return detail::from_arrow(input_table, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index eeb27c2ac05..fb203e6c3c1 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -154,7 +154,7 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<numeric::decimal64>(
 
   auto count = thrust::make_counting_iterator(0);
 
-  thrust::for_each(rmm::exec_policy(cudf::default_stream_value),
+  thrust::for_each(rmm::exec_policy(cudf::get_default_stream()),
                    count,
                    count + input.size(),
                    [in = input.begin<DeviceType>(), out = buf.data()] __device__(auto in_idx) {
@@ -416,7 +416,7 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
                                        arrow::MemoryPool* ar_mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_arrow(input, metadata, cudf::default_stream_value, ar_mr);
+  return detail::to_arrow(input, metadata, cudf::get_default_stream(), ar_mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/io/fst/logical_stack.cuh b/cpp/src/io/fst/logical_stack.cuh
index 9502922a379..b23a3d756df 100644
--- a/cpp/src/io/fst/logical_stack.cuh
+++ b/cpp/src/io/fst/logical_stack.cuh
@@ -267,7 +267,7 @@ void sparse_stack_op_to_top_of_stack(StackSymbolItT d_symbols,
                                      StackSymbolT const empty_stack_symbol,
                                      StackSymbolT const read_symbol,
                                      std::size_t const num_symbols_out,
-                                     rmm::cuda_stream_view stream = cudf::default_stream_value)
+                                     rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   rmm::device_buffer temp_storage{};
 
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index aabaa941daf..968d3827bfe 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -156,7 +156,7 @@ table_with_metadata read_avro(avro_reader_options const& options,
 
   CUDF_EXPECTS(datasources.size() == 1, "Only a single source is currently supported.");
 
-  return avro::read_avro(std::move(datasources[0]), options, cudf::default_stream_value, mr);
+  return avro::read_avro(std::move(datasources[0]), options, cudf::get_default_stream(), mr);
 }
 
 compression_type infer_compression_type(compression_type compression, source_info const& info)
@@ -198,7 +198,7 @@ table_with_metadata read_json(json_reader_options options, rmm::mr::device_memor
                                       options.get_byte_range_offset(),
                                       options.get_byte_range_size_with_padding());
 
-  return detail::json::read_json(datasources, options, cudf::default_stream_value, mr);
+  return detail::json::read_json(datasources, options, cudf::get_default_stream(), mr);
 }
 
 table_with_metadata read_csv(csv_reader_options options, rmm::mr::device_memory_resource* mr)
@@ -216,7 +216,7 @@ table_with_metadata read_csv(csv_reader_options options, rmm::mr::device_memory_
   return cudf::io::detail::csv::read_csv(  //
     std::move(datasources[0]),
     options,
-    cudf::default_stream_value,
+    cudf::get_default_stream(),
     mr);
 }
 
@@ -233,7 +233,7 @@ void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resourc
     options.get_table(),
     options.get_metadata(),
     options,
-    cudf::default_stream_value,
+    cudf::get_default_stream(),
     mr);
 }
 
@@ -241,7 +241,7 @@ namespace detail_orc = cudf::io::detail::orc;
 
 raw_orc_statistics read_raw_orc_statistics(source_info const& src_info)
 {
-  auto stream = cudf::default_stream_value;
+  auto stream = cudf::get_default_stream();
   // Get source to read statistics from
   std::unique_ptr<datasource> source;
   if (src_info.type() == io_type::FILEPATH) {
@@ -347,7 +347,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_
 
   auto datasources = make_datasources(options.get_source());
   auto reader      = std::make_unique<detail_orc::reader>(
-    std::move(datasources), options, cudf::default_stream_value, mr);
+    std::move(datasources), options, cudf::get_default_stream(), mr);
 
   return reader->read(options);
 }
@@ -365,7 +365,7 @@ void write_orc(orc_writer_options const& options, rmm::mr::device_memory_resourc
   CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");
 
   auto writer = std::make_unique<detail_orc::writer>(
-    std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::default_stream_value, mr);
+    std::move(sinks[0]), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream(), mr);
 
   writer->write(options.get_table());
 }
@@ -382,7 +382,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options
   CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");
 
   writer = std::make_unique<detail_orc::writer>(
-    std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::default_stream_value, mr);
+    std::move(sinks[0]), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -417,7 +417,7 @@ table_with_metadata read_parquet(parquet_reader_options const& options,
 
   auto datasources = make_datasources(options.get_source());
   auto reader      = std::make_unique<detail_parquet::reader>(
-    std::move(datasources), options, cudf::default_stream_value, mr);
+    std::move(datasources), options, cudf::get_default_stream(), mr);
 
   return reader->read(options);
 }
@@ -458,7 +458,7 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
 
   auto sinks  = make_datasinks(options.get_sink());
   auto writer = std::make_unique<detail_parquet::writer>(
-    std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::default_stream_value, mr);
+    std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream(), mr);
 
   writer->write(options.get_table(), options.get_partitions());
 
@@ -476,7 +476,7 @@ parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options co
   auto sinks = make_datasinks(options.get_sink());
 
   writer = std::make_unique<detail_parquet::writer>(
-    std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::default_stream_value, mr);
+    std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream(), mr);
 }
 
 /**
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index 872e742a5af..36329db3e88 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -73,7 +73,7 @@ auto print_vec = [](auto const& cpu, auto const name, auto converter) {
 
 void print_tree(host_span<SymbolT const> input,
                 tree_meta_t const& d_gpu_tree,
-                rmm::cuda_stream_view stream = cudf::default_stream_value)
+                rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.node_categories, stream),
             "node_categories",
@@ -278,11 +278,11 @@ std::vector<std::string> copy_strings_to_host(device_span<SymbolT const> input,
     auto const scv     = cudf::strings_column_view(col);
     auto const h_chars = cudf::detail::make_std_vector_sync<char>(
       cudf::device_span<char const>(scv.chars().data<char>(), scv.chars().size()),
-      cudf::default_stream_value);
+      cudf::get_default_stream());
     auto const h_offsets = cudf::detail::make_std_vector_sync(
       cudf::device_span<cudf::offset_type const>(
         scv.offsets().data<cudf::offset_type>() + scv.offset(), scv.size() + 1),
-      cudf::default_stream_value);
+      cudf::get_default_stream());
 
     // build std::string vector from chars and offsets
     std::vector<std::string> host_data;
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index cf041b02a20..8be298c6a8b 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -162,8 +162,14 @@ std::pair<rmm::device_uvector<KeyType>, rmm::device_uvector<IndexType>> stable_s
   thrust::copy(rmm::exec_policy(stream), keys.begin(), keys.end(), keys_buffer1.begin());
   thrust::sequence(rmm::exec_policy(stream), order_buffer1.begin(), order_buffer1.end());
 
-  cub::DeviceRadixSort::SortPairs(
-    d_temp_storage.data(), temp_storage_bytes, keys_buffer, order_buffer, keys.size());
+  cub::DeviceRadixSort::SortPairs(d_temp_storage.data(),
+                                  temp_storage_bytes,
+                                  keys_buffer,
+                                  order_buffer,
+                                  keys.size(),
+                                  0,
+                                  sizeof(KeyType) * 8,
+                                  stream.value());
 
   return std::pair{keys_buffer.Current() == keys_buffer1.data() ? std::move(keys_buffer1)
                                                                 : std::move(keys_buffer2),
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 29a29a1f9d5..0c35930c2e4 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -1540,7 +1540,7 @@ auto parsing_options(cudf::io::json_reader_options const& options)
 {
   auto parse_opts = cudf::io::parse_options{',', '\n', '\"', '.'};
 
-  auto const stream     = cudf::default_stream_value;
+  auto const stream     = cudf::get_default_stream();
   parse_opts.dayfirst   = options.is_enabled_dayfirst();
   parse_opts.keepquotes = options.is_enabled_keep_quotes();
   parse_opts.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
diff --git a/cpp/src/io/orc/timezone.cuh b/cpp/src/io/orc/timezone.cuh
index 2eb20af7898..9b98aa13bac 100644
--- a/cpp/src/io/orc/timezone.cuh
+++ b/cpp/src/io/orc/timezone.cuh
@@ -115,7 +115,7 @@ class timezone_table {
 
  public:
   // Safe to use the default stream, device_uvectors will not change after they are created empty
-  timezone_table() : ttimes{0, cudf::default_stream_value}, offsets{0, cudf::default_stream_value}
+  timezone_table() : ttimes{0, cudf::get_default_stream()}, offsets{0, cudf::get_default_stream()}
   {
   }
   timezone_table(int32_t gmt_offset,
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index e4b6bad614d..7f1f6688bec 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -170,7 +170,7 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
       h_compressed_offsets.resize(1);
       h_decompressed_offsets.resize(1);
       // shrinking doesn't allocate/free, so we don't need to worry about streams
-      auto stream = cudf::default_stream_value;
+      auto stream = cudf::get_default_stream();
       d_compressed_blocks.resize(0, stream);
       d_decompressed_blocks.resize(0, stream);
       d_compressed_offsets.resize(0, stream);
@@ -256,8 +256,8 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
                           uint64_t virtual_begin,
                           uint64_t virtual_end)
     : _data_stream(std::move(input_stream)),
-      _prev_blocks{cudf::default_stream_value},  // here we can use the default stream because
-      _curr_blocks{cudf::default_stream_value},  // we only initialize empty device_uvectors
+      _prev_blocks{cudf::get_default_stream()},  // here we can use the default stream because
+      _curr_blocks{cudf::get_default_stream()},  // we only initialize empty device_uvectors
       _local_end{virtual_end & 0xFFFFu},
       _compressed_pos{virtual_begin >> 16},
       _compressed_end{virtual_end >> 16}
diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index 136eb8d24c6..29cec0e8c3f 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -452,19 +452,19 @@ class output_builder {
    * @param actual_size The number of elements that were written to the result of the previous
    *                    `next_output` call.
    */
-  void advance_output(size_type actual_size)
+  void advance_output(size_type actual_size, rmm::cuda_stream_view stream)
   {
     CUDF_EXPECTS(actual_size <= _max_write_size, "Internal error");
     if (_chunks.size() < 2) {
       auto const new_size = _chunks.back().size() + actual_size;
-      inplace_resize(_chunks.back(), new_size);
+      inplace_resize(_chunks.back(), new_size, stream);
     } else {
       auto& tail              = _chunks.back();
       auto& prev              = _chunks.rbegin()[1];
       auto const prev_advance = std::min(actual_size, prev.capacity() - prev.size());
       auto const tail_advance = actual_size - prev_advance;
-      inplace_resize(prev, prev.size() + prev_advance);
-      inplace_resize(tail, tail.size() + tail_advance);
+      inplace_resize(prev, prev.size() + prev_advance, stream);
+      inplace_resize(tail, tail.size() + tail_advance, stream);
     }
     _size += actual_size;
   }
@@ -522,10 +522,12 @@ class output_builder {
    * @param vector The vector
    * @param new_size The new size. Must be smaller than the vector's capacity
    */
-  static void inplace_resize(rmm::device_uvector<T>& vector, size_type new_size)
+  static void inplace_resize(rmm::device_uvector<T>& vector,
+                             size_type new_size,
+                             rmm::cuda_stream_view stream)
   {
     CUDF_EXPECTS(new_size <= vector.capacity(), "Internal error");
-    vector.resize(new_size, rmm::cuda_stream_view{});
+    vector.resize(new_size, stream);
   }
 
   /**
@@ -712,7 +714,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
       found_last_offset = true;
       return end_loc + 1;
     }();
-    row_offset_storage.advance_output(new_offsets);
+    row_offset_storage.advance_output(new_offsets, scan_stream);
     // determine if we found the first or last field offset for the byte range
     if (new_offsets > 0 and not first_row_offset) {
       first_row_offset = row_offset_storage.front_element(scan_stream);
@@ -729,7 +731,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
       auto const split = begin + std::min<byte_offset>(output_size, char_output.head().size());
       thrust::copy(rmm::exec_policy_nosync(scan_stream), begin, split, char_output.head().begin());
       thrust::copy(rmm::exec_policy_nosync(scan_stream), split, end, char_output.tail().begin());
-      char_storage.advance_output(output_size);
+      char_storage.advance_output(output_size, scan_stream);
     }
 
     cudaEventRecord(last_launch_event, scan_stream.value());
@@ -782,7 +784,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                                               std::optional<byte_range_info> byte_range,
                                               rmm::mr::device_memory_resource* mr)
 {
-  auto stream      = cudf::default_stream_value;
+  auto stream      = cudf::get_default_stream();
   auto stream_pool = rmm::cuda_stream_pool(2);
 
   auto result = detail::multibyte_split(
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index b5e59871119..6e34d862ed4 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -40,7 +40,7 @@ class hostdevice_vector {
  public:
   using value_type = T;
 
-  hostdevice_vector() : hostdevice_vector(0, cudf::default_stream_value) {}
+  hostdevice_vector() : hostdevice_vector(0, cudf::get_default_stream()) {}
 
   explicit hostdevice_vector(size_t size, rmm::cuda_stream_view stream)
     : hostdevice_vector(size, size, stream)
diff --git a/cpp/src/join/conditional_join.cu b/cpp/src/join/conditional_join.cu
index f0b66559799..cf1476d8bcc 100644
--- a/cpp/src/join/conditional_join.cu
+++ b/cpp/src/join/conditional_join.cu
@@ -298,7 +298,7 @@ conditional_inner_join(table_view const& left,
                                   binary_predicate,
                                   detail::join_kind::INNER_JOIN,
                                   output_size,
-                                  cudf::default_stream_value,
+                                  cudf::get_default_stream(),
                                   mr);
 }
 
@@ -316,7 +316,7 @@ conditional_left_join(table_view const& left,
                                   binary_predicate,
                                   detail::join_kind::LEFT_JOIN,
                                   output_size,
-                                  cudf::default_stream_value,
+                                  cudf::get_default_stream(),
                                   mr);
 }
 
@@ -333,7 +333,7 @@ conditional_full_join(table_view const& left,
                                   binary_predicate,
                                   detail::join_kind::FULL_JOIN,
                                   {},
-                                  cudf::default_stream_value,
+                                  cudf::get_default_stream(),
                                   mr);
 }
 
@@ -350,7 +350,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_semi_join(
                                             binary_predicate,
                                             detail::join_kind::LEFT_SEMI_JOIN,
                                             output_size,
-                                            cudf::default_stream_value,
+                                            cudf::get_default_stream(),
                                             mr)
                      .first);
 }
@@ -368,7 +368,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> conditional_left_anti_join(
                                             binary_predicate,
                                             detail::join_kind::LEFT_ANTI_JOIN,
                                             output_size,
-                                            cudf::default_stream_value,
+                                            cudf::get_default_stream(),
                                             mr)
                      .first);
 }
@@ -380,7 +380,7 @@ std::size_t conditional_inner_join_size(table_view const& left,
 {
   CUDF_FUNC_RANGE();
   return detail::compute_conditional_join_output_size(
-    left, right, binary_predicate, detail::join_kind::INNER_JOIN, cudf::default_stream_value, mr);
+    left, right, binary_predicate, detail::join_kind::INNER_JOIN, cudf::get_default_stream(), mr);
 }
 
 std::size_t conditional_left_join_size(table_view const& left,
@@ -390,7 +390,7 @@ std::size_t conditional_left_join_size(table_view const& left,
 {
   CUDF_FUNC_RANGE();
   return detail::compute_conditional_join_output_size(
-    left, right, binary_predicate, detail::join_kind::LEFT_JOIN, cudf::default_stream_value, mr);
+    left, right, binary_predicate, detail::join_kind::LEFT_JOIN, cudf::get_default_stream(), mr);
 }
 
 std::size_t conditional_left_semi_join_size(table_view const& left,
@@ -403,7 +403,7 @@ std::size_t conditional_left_semi_join_size(table_view const& left,
                                                                 right,
                                                                 binary_predicate,
                                                                 detail::join_kind::LEFT_SEMI_JOIN,
-                                                                cudf::default_stream_value,
+                                                                cudf::get_default_stream(),
                                                                 mr));
 }
 
@@ -417,7 +417,7 @@ std::size_t conditional_left_anti_join_size(table_view const& left,
                                                                 right,
                                                                 binary_predicate,
                                                                 detail::join_kind::LEFT_ANTI_JOIN,
-                                                                cudf::default_stream_value,
+                                                                cudf::get_default_stream(),
                                                                 mr));
 }
 
diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp
index 6de2664b5f6..23ecfebc52a 100644
--- a/cpp/src/join/conditional_join.hpp
+++ b/cpp/src/join/conditional_join.hpp
@@ -48,7 +48,7 @@ conditional_join(table_view const& left,
                  ast::expression const& binary_predicate,
                  join_kind JoinKind,
                  std::optional<std::size_t> output_size = {},
-                 rmm::cuda_stream_view stream           = cudf::default_stream_value,
+                 rmm::cuda_stream_view stream           = cudf::get_default_stream(),
                  rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource());
 
 /**
@@ -68,7 +68,7 @@ std::size_t compute_conditional_join_output_size(
   table_view const& right,
   ast::expression const& binary_predicate,
   join_kind JoinKind,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/src/join/cross_join.cu b/cpp/src/join/cross_join.cu
index 3eb9f1b1198..7358726d69d 100644
--- a/cpp/src/join/cross_join.cu
+++ b/cpp/src/join/cross_join.cu
@@ -78,7 +78,7 @@ std::unique_ptr<cudf::table> cross_join(cudf::table_view const& left,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::cross_join(left, right, cudf::default_stream_value, mr);
+  return detail::cross_join(left, right, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu
index bb8fc07c2d7..dbc543f4dcd 100644
--- a/cpp/src/join/join.cu
+++ b/cpp/src/join/join.cu
@@ -113,7 +113,7 @@ inner_join(table_view const& left,
            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::inner_join(left, right, compare_nulls, cudf::default_stream_value, mr);
+  return detail::inner_join(left, right, compare_nulls, cudf::get_default_stream(), mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
@@ -124,7 +124,7 @@ left_join(table_view const& left,
           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::left_join(left, right, compare_nulls, cudf::default_stream_value, mr);
+  return detail::left_join(left, right, compare_nulls, cudf::get_default_stream(), mr);
 }
 
 std::pair<std::unique_ptr<rmm::device_uvector<size_type>>,
@@ -135,7 +135,7 @@ full_join(table_view const& left,
           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::full_join(left, right, compare_nulls, cudf::default_stream_value, mr);
+  return detail::full_join(left, right, compare_nulls, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join.cu b/cpp/src/join/mixed_join.cu
index ec2dacaca5b..4cedfca218a 100644
--- a/cpp/src/join/mixed_join.cu
+++ b/cpp/src/join/mixed_join.cu
@@ -458,7 +458,7 @@ mixed_inner_join(
                             compare_nulls,
                             detail::join_kind::INNER_JOIN,
                             output_size_data,
-                            cudf::default_stream_value,
+                            cudf::get_default_stream(),
                             mr);
 }
 
@@ -479,7 +479,7 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_in
                                                 binary_predicate,
                                                 compare_nulls,
                                                 detail::join_kind::INNER_JOIN,
-                                                cudf::default_stream_value,
+                                                cudf::get_default_stream(),
                                                 mr);
 }
 
@@ -504,7 +504,7 @@ mixed_left_join(
                             compare_nulls,
                             detail::join_kind::LEFT_JOIN,
                             output_size_data,
-                            cudf::default_stream_value,
+                            cudf::get_default_stream(),
                             mr);
 }
 
@@ -525,7 +525,7 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_le
                                                 binary_predicate,
                                                 compare_nulls,
                                                 detail::join_kind::LEFT_JOIN,
-                                                cudf::default_stream_value,
+                                                cudf::get_default_stream(),
                                                 mr);
 }
 
@@ -550,7 +550,7 @@ mixed_full_join(
                             compare_nulls,
                             detail::join_kind::FULL_JOIN,
                             output_size_data,
-                            cudf::default_stream_value,
+                            cudf::get_default_stream(),
                             mr);
 }
 
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
index a9897f0f40e..6ebf3702256 100644
--- a/cpp/src/join/mixed_join_semi.cu
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -503,7 +503,7 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_le
                                                      binary_predicate,
                                                      compare_nulls,
                                                      detail::join_kind::LEFT_SEMI_JOIN,
-                                                     cudf::default_stream_value,
+                                                     cudf::get_default_stream(),
                                                      mr);
 }
 
@@ -526,7 +526,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_semi_join(
                                  compare_nulls,
                                  detail::join_kind::LEFT_SEMI_JOIN,
                                  output_size_data,
-                                 cudf::default_stream_value,
+                                 cudf::get_default_stream(),
                                  mr);
 }
 
@@ -547,7 +547,7 @@ std::pair<std::size_t, std::unique_ptr<rmm::device_uvector<size_type>>> mixed_le
                                                      binary_predicate,
                                                      compare_nulls,
                                                      detail::join_kind::LEFT_ANTI_JOIN,
-                                                     cudf::default_stream_value,
+                                                     cudf::get_default_stream(),
                                                      mr);
 }
 
@@ -570,7 +570,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_left_anti_join(
                                  compare_nulls,
                                  detail::join_kind::LEFT_ANTI_JOIN,
                                  output_size_data,
-                                 cudf::default_stream_value,
+                                 cudf::get_default_stream(),
                                  mr);
 }
 
diff --git a/cpp/src/join/semi_join.cu b/cpp/src/join/semi_join.cu
index 87bac002f53..cc523b2ac7f 100644
--- a/cpp/src/join/semi_join.cu
+++ b/cpp/src/join/semi_join.cu
@@ -95,7 +95,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_semi_join(
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join(
-    detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, cudf::default_stream_value, mr);
+    detail::join_kind::LEFT_SEMI_JOIN, left, right, compare_nulls, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_anti_join(
@@ -106,7 +106,7 @@ std::unique_ptr<rmm::device_uvector<cudf::size_type>> left_anti_join(
 {
   CUDF_FUNC_RANGE();
   return detail::left_semi_anti_join(
-    detail::join_kind::LEFT_ANTI_JOIN, left, right, compare_nulls, cudf::default_stream_value, mr);
+    detail::join_kind::LEFT_ANTI_JOIN, left, right, compare_nulls, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/labeling/label_bins.cu b/cpp/src/labeling/label_bins.cu
index f5e35fc842f..4c3469c679e 100644
--- a/cpp/src/labeling/label_bins.cu
+++ b/cpp/src/labeling/label_bins.cu
@@ -244,7 +244,7 @@ std::unique_ptr<column> label_bins(column_view const& input,
                             left_inclusive,
                             right_edges,
                             right_inclusive,
-                            cudf::default_stream_value,
+                            cudf::get_default_stream(),
                             mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/lists/combine/concatenate_list_elements.cu b/cpp/src/lists/combine/concatenate_list_elements.cu
index c107bad018d..496d9ee670a 100644
--- a/cpp/src/lists/combine/concatenate_list_elements.cu
+++ b/cpp/src/lists/combine/concatenate_list_elements.cu
@@ -287,7 +287,7 @@ std::unique_ptr<column> concatenate_list_elements(column_view const& input,
                                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate_list_elements(input, null_policy, cudf::default_stream_value, mr);
+  return detail::concatenate_list_elements(input, null_policy, cudf::get_default_stream(), mr);
 }
 
 }  // namespace lists
diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu
index 4364470407f..0a3ff333d6c 100644
--- a/cpp/src/lists/combine/concatenate_rows.cu
+++ b/cpp/src/lists/combine/concatenate_rows.cu
@@ -307,7 +307,7 @@ std::unique_ptr<column> concatenate_rows(table_view const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::concatenate_rows(input, null_policy, cudf::default_stream_value, mr);
+  return detail::concatenate_rows(input, null_policy, cudf::get_default_stream(), mr);
 }
 
 }  // namespace lists
diff --git a/cpp/src/lists/contains.cu b/cpp/src/lists/contains.cu
index 3a52426c16a..0142e736fd0 100644
--- a/cpp/src/lists/contains.cu
+++ b/cpp/src/lists/contains.cu
@@ -495,7 +495,7 @@ std::unique_ptr<column> contains(lists_column_view const& lists,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains(lists, search_key, cudf::default_stream_value, mr);
+  return detail::contains(lists, search_key, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> contains(lists_column_view const& lists,
@@ -503,14 +503,14 @@ std::unique_ptr<column> contains(lists_column_view const& lists,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains(lists, search_keys, cudf::default_stream_value, mr);
+  return detail::contains(lists, search_keys, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> contains_nulls(lists_column_view const& lists,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains_nulls(lists, cudf::default_stream_value, mr);
+  return detail::contains_nulls(lists, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> index_of(lists_column_view const& lists,
@@ -519,7 +519,7 @@ std::unique_ptr<column> index_of(lists_column_view const& lists,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::index_of(lists, search_key, find_option, cudf::default_stream_value, mr);
+  return detail::index_of(lists, search_key, find_option, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> index_of(lists_column_view const& lists,
@@ -528,7 +528,7 @@ std::unique_ptr<column> index_of(lists_column_view const& lists,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::index_of(lists, search_keys, find_option, cudf::default_stream_value, mr);
+  return detail::index_of(lists, search_keys, find_option, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf::lists
diff --git a/cpp/src/lists/copying/segmented_gather.cu b/cpp/src/lists/copying/segmented_gather.cu
index db37a82ba8e..2c12e09bcd9 100644
--- a/cpp/src/lists/copying/segmented_gather.cu
+++ b/cpp/src/lists/copying/segmented_gather.cu
@@ -120,7 +120,7 @@ std::unique_ptr<column> segmented_gather(lists_column_view const& source_column,
 {
   CUDF_FUNC_RANGE();
   return detail::segmented_gather(
-    source_column, gather_map_list, bounds_policy, cudf::default_stream_value, mr);
+    source_column, gather_map_list, bounds_policy, cudf::get_default_stream(), mr);
 }
 
 }  // namespace lists
diff --git a/cpp/src/lists/count_elements.cu b/cpp/src/lists/count_elements.cu
index 68748dfde3f..f8e7b4c6126 100644
--- a/cpp/src/lists/count_elements.cu
+++ b/cpp/src/lists/count_elements.cu
@@ -76,7 +76,7 @@ std::unique_ptr<column> count_elements(lists_column_view const& input,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_elements(input, cudf::default_stream_value, mr);
+  return detail::count_elements(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace lists
diff --git a/cpp/src/lists/explode.cu b/cpp/src/lists/explode.cu
index 873b0fe408d..4db3254f201 100644
--- a/cpp/src/lists/explode.cu
+++ b/cpp/src/lists/explode.cu
@@ -299,7 +299,7 @@ std::unique_ptr<table> explode(table_view const& input_table,
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST,
                "Unsupported non-list column");
-  return detail::explode(input_table, explode_column_idx, cudf::default_stream_value, mr);
+  return detail::explode(input_table, explode_column_idx, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -312,7 +312,7 @@ std::unique_ptr<table> explode_position(table_view const& input_table,
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST,
                "Unsupported non-list column");
-  return detail::explode_position(input_table, explode_column_idx, cudf::default_stream_value, mr);
+  return detail::explode_position(input_table, explode_column_idx, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -326,7 +326,7 @@ std::unique_ptr<table> explode_outer(table_view const& input_table,
   CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST,
                "Unsupported non-list column");
   return detail::explode_outer(
-    input_table, explode_column_idx, false, cudf::default_stream_value, mr);
+    input_table, explode_column_idx, false, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -341,7 +341,7 @@ std::unique_ptr<table> explode_outer_position(table_view const& input_table,
   CUDF_EXPECTS(input_table.column(explode_column_idx).type().id() == type_id::LIST,
                "Unsupported non-list column");
   return detail::explode_outer(
-    input_table, explode_column_idx, true, cudf::default_stream_value, mr);
+    input_table, explode_column_idx, true, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/lists/extract.cu b/cpp/src/lists/extract.cu
index bc04bad7c0c..d1807c2c5ac 100644
--- a/cpp/src/lists/extract.cu
+++ b/cpp/src/lists/extract.cu
@@ -171,7 +171,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_list_element(lists_column, index, cudf::default_stream_value, mr);
+  return detail::extract_list_element(lists_column, index, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -186,7 +186,7 @@ std::unique_ptr<column> extract_list_element(lists_column_view const& lists_colu
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(indices.size() == lists_column.size(),
                "Index column must have as many elements as lists column.");
-  return detail::extract_list_element(lists_column, indices, cudf::default_stream_value, mr);
+  return detail::extract_list_element(lists_column, indices, cudf::get_default_stream(), mr);
 }
 
 }  // namespace lists
diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu
index ea35977e8e4..1a4e3ea66ed 100644
--- a/cpp/src/lists/segmented_sort.cu
+++ b/cpp/src/lists/segmented_sort.cu
@@ -328,7 +328,7 @@ std::unique_ptr<column> sort_lists(lists_column_view const& input,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sort_lists(input, column_order, null_precedence, cudf::default_stream_value, mr);
+  return detail::sort_lists(input, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> stable_sort_lists(lists_column_view const& input,
@@ -338,7 +338,7 @@ std::unique_ptr<column> stable_sort_lists(lists_column_view const& input,
 {
   CUDF_FUNC_RANGE();
   return detail::stable_sort_lists(
-    input, column_order, null_precedence, cudf::default_stream_value, mr);
+    input, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace lists
diff --git a/cpp/src/lists/sequences.cu b/cpp/src/lists/sequences.cu
index 6c2b0b1a785..bb0e669339a 100644
--- a/cpp/src/lists/sequences.cu
+++ b/cpp/src/lists/sequences.cu
@@ -214,7 +214,7 @@ std::unique_ptr<column> sequences(column_view const& starts,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sequences(starts, sizes, cudf::default_stream_value, mr);
+  return detail::sequences(starts, sizes, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> sequences(column_view const& starts,
@@ -223,7 +223,7 @@ std::unique_ptr<column> sequences(column_view const& starts,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sequences(starts, steps, sizes, cudf::default_stream_value, mr);
+  return detail::sequences(starts, steps, sizes, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf::lists
diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu
index 00cdfcf7ff1..cc52478900a 100644
--- a/cpp/src/lists/set_operations.cu
+++ b/cpp/src/lists/set_operations.cu
@@ -267,7 +267,7 @@ std::unique_ptr<column> have_overlap(lists_column_view const& lhs,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::have_overlap(lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr);
+  return detail::have_overlap(lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> intersect_distinct(lists_column_view const& lhs,
@@ -278,7 +278,7 @@ std::unique_ptr<column> intersect_distinct(lists_column_view const& lhs,
 {
   CUDF_FUNC_RANGE();
   return detail::intersect_distinct(
-    lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr);
+    lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> union_distinct(lists_column_view const& lhs,
@@ -288,7 +288,7 @@ std::unique_ptr<column> union_distinct(lists_column_view const& lhs,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::union_distinct(lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr);
+  return detail::union_distinct(lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> difference_distinct(lists_column_view const& lhs,
@@ -299,7 +299,7 @@ std::unique_ptr<column> difference_distinct(lists_column_view const& lhs,
 {
   CUDF_FUNC_RANGE();
   return detail::difference_distinct(
-    lhs, rhs, nulls_equal, nans_equal, cudf::default_stream_value, mr);
+    lhs, rhs, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf::lists
diff --git a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
index c99486ca8b0..c1c17dc0688 100644
--- a/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
+++ b/cpp/src/lists/stream_compaction/apply_boolean_mask.cu
@@ -104,7 +104,7 @@ std::unique_ptr<column> apply_boolean_mask(lists_column_view const& input,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::apply_boolean_mask(input, boolean_mask, cudf::default_stream_value, mr);
+  return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf::lists
diff --git a/cpp/src/lists/stream_compaction/distinct.cu b/cpp/src/lists/stream_compaction/distinct.cu
index c88209292de..d0e4557663e 100644
--- a/cpp/src/lists/stream_compaction/distinct.cu
+++ b/cpp/src/lists/stream_compaction/distinct.cu
@@ -78,7 +78,7 @@ std::unique_ptr<column> distinct(lists_column_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::distinct(input, nulls_equal, nans_equal, cudf::default_stream_value, mr);
+  return detail::distinct(input, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf::lists
diff --git a/cpp/src/merge/merge.cu b/cpp/src/merge/merge.cu
index 91018d3f006..d9c573e8155 100644
--- a/cpp/src/merge/merge.cu
+++ b/cpp/src/merge/merge.cu
@@ -171,7 +171,7 @@ index_vector generate_merged_indices(table_view const& left_table,
                                      std::vector<order> const& column_order,
                                      std::vector<null_order> const& null_precedence,
                                      bool nullable                = true,
-                                     rmm::cuda_stream_view stream = cudf::default_stream_value)
+                                     rmm::cuda_stream_view stream = cudf::get_default_stream())
 {
   const size_type left_size  = left_table.num_rows();
   const size_type right_size = right_table.num_rows();
@@ -540,7 +540,7 @@ std::unique_ptr<cudf::table> merge(std::vector<table_view> const& tables_to_merg
 {
   CUDF_FUNC_RANGE();
   return detail::merge(
-    tables_to_merge, key_cols, column_order, null_precedence, cudf::default_stream_value, mr);
+    tables_to_merge, key_cols, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index 296a9f40fbb..e4d366e7d01 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -797,7 +797,7 @@ std::pair<std::unique_ptr<table>, std::vector<size_type>> partition(
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::partition(t, partition_map, num_partitions, cudf::default_stream_value, mr);
+  return detail::partition(t, partition_map, num_partitions, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/partitioning/round_robin.cu b/cpp/src/partitioning/round_robin.cu
index 85bd31a20ea..990992cd8f2 100644
--- a/cpp/src/partitioning/round_robin.cu
+++ b/cpp/src/partitioning/round_robin.cu
@@ -152,7 +152,7 @@ std::pair<std::unique_ptr<table>, std::vector<cudf::size_type>> round_robin_part
   table_view const& input,
   cudf::size_type num_partitions,
   cudf::size_type start_partition     = 0,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto nrows = input.num_rows();
@@ -271,7 +271,7 @@ std::pair<std::unique_ptr<cudf::table>, std::vector<cudf::size_type>> round_robi
 {
   CUDF_FUNC_RANGE();
   return detail::round_robin_partition(
-    input, num_partitions, start_partition, cudf::default_stream_value, mr);
+    input, num_partitions, start_partition, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/quantiles/quantile.cu b/cpp/src/quantiles/quantile.cu
index 1fe9809d922..1f1941529c9 100644
--- a/cpp/src/quantiles/quantile.cu
+++ b/cpp/src/quantiles/quantile.cu
@@ -189,7 +189,7 @@ std::unique_ptr<column> quantile(column_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::quantile(input, q, interp, ordered_indices, exact, cudf::default_stream_value, mr);
+  return detail::quantile(input, q, interp, ordered_indices, exact, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/quantiles/quantiles.cu b/cpp/src/quantiles/quantiles.cu
index c6957482f05..e3e19eaeec4 100644
--- a/cpp/src/quantiles/quantiles.cu
+++ b/cpp/src/quantiles/quantiles.cu
@@ -83,12 +83,12 @@ std::unique_ptr<table> quantiles(table_view const& input,
                              thrust::make_counting_iterator<size_type>(0),
                              q,
                              interp,
-                             cudf::default_stream_value,
+                             cudf::get_default_stream(),
                              mr);
   } else {
     auto sorted_idx = detail::sorted_order(input, column_order, null_precedence);
     return detail::quantiles(
-      input, sorted_idx->view().data<size_type>(), q, interp, cudf::default_stream_value, mr);
+      input, sorted_idx->view().data<size_type>(), q, interp, cudf::get_default_stream(), mr);
   }
 }
 
@@ -109,7 +109,7 @@ std::unique_ptr<table> quantiles(table_view const& input,
                            is_input_sorted,
                            column_order,
                            null_precedence,
-                           cudf::default_stream_value,
+                           cudf::get_default_stream(),
                            mr);
 }
 
diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu
index a11d7ab6646..019809d5f68 100644
--- a/cpp/src/quantiles/tdigest/tdigest.cu
+++ b/cpp/src/quantiles/tdigest/tdigest.cu
@@ -407,7 +407,7 @@ std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::percentile_approx(input, percentiles, cudf::default_stream_value, mr);
+  return detail::percentile_approx(input, percentiles, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/reductions/minmax.cu b/cpp/src/reductions/minmax.cu
index e69942552ff..603e13c1894 100644
--- a/cpp/src/reductions/minmax.cu
+++ b/cpp/src/reductions/minmax.cu
@@ -277,7 +277,7 @@ std::pair<std::unique_ptr<scalar>, std::unique_ptr<scalar>> minmax(
   const column_view& col, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::minmax(col, cudf::default_stream_value, mr);
+  return detail::minmax(col, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 523865e0df0..4166becbf4d 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -143,7 +143,7 @@ std::unique_ptr<scalar> reduce(
   std::unique_ptr<reduce_aggregation> const& agg,
   data_type output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(),
@@ -186,7 +186,7 @@ std::unique_ptr<scalar> reduce(column_view const& col,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::reduce(col, agg, output_dtype, std::nullopt, cudf::default_stream_value, mr);
+  return detail::reduce(col, agg, output_dtype, std::nullopt, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<scalar> reduce(column_view const& col,
@@ -196,6 +196,6 @@ std::unique_ptr<scalar> reduce(column_view const& col,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::reduce(col, agg, output_dtype, init, cudf::default_stream_value, mr);
+  return detail::reduce(col, agg, output_dtype, init, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp
index 85c0f7ea13f..c0b787b3a1d 100644
--- a/cpp/src/reductions/scan/scan.cpp
+++ b/cpp/src/reductions/scan/scan.cpp
@@ -61,7 +61,7 @@ std::unique_ptr<column> scan(column_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::scan(input, agg, inclusive, null_handling, cudf::default_stream_value, mr);
+  return detail::scan(input, agg, inclusive, null_handling, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/reductions/segmented_reductions.cpp b/cpp/src/reductions/segmented_reductions.cpp
index d87644e7126..04a83217469 100644
--- a/cpp/src/reductions/segmented_reductions.cpp
+++ b/cpp/src/reductions/segmented_reductions.cpp
@@ -133,7 +133,7 @@ std::unique_ptr<column> segmented_reduce(column_view const& segmented_values,
                                   output_dtype,
                                   null_handling,
                                   std::nullopt,
-                                  cudf::default_stream_value,
+                                  cudf::get_default_stream(),
                                   mr);
 }
 
@@ -152,7 +152,7 @@ std::unique_ptr<column> segmented_reduce(column_view const& segmented_values,
                                   output_dtype,
                                   null_handling,
                                   init,
-                                  cudf::default_stream_value,
+                                  cudf::get_default_stream(),
                                   mr);
 }
 
diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu
index f5e0ca3b3ef..24822cc6c65 100644
--- a/cpp/src/replace/clamp.cu
+++ b/cpp/src/replace/clamp.cu
@@ -391,7 +391,7 @@ std::unique_ptr<column> clamp(column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::clamp(input, lo, lo_replace, hi, hi_replace, cudf::default_stream_value, mr);
+  return detail::clamp(input, lo, lo_replace, hi, hi_replace, cudf::get_default_stream(), mr);
 }
 
 // clamp input at lo and hi
@@ -401,6 +401,6 @@ std::unique_ptr<column> clamp(column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::clamp(input, lo, lo, hi, hi, cudf::default_stream_value, mr);
+  return detail::clamp(input, lo, lo, hi, hi, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/replace/nans.cu b/cpp/src/replace/nans.cu
index 47776422adb..ce0d2d07b36 100644
--- a/cpp/src/replace/nans.cu
+++ b/cpp/src/replace/nans.cu
@@ -114,7 +114,7 @@ std::unique_ptr<column> replace_nans(column_view const& input,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_nans(input, replacement, cudf::default_stream_value, mr);
+  return detail::replace_nans(input, replacement, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> replace_nans(column_view const& input,
@@ -122,7 +122,7 @@ std::unique_ptr<column> replace_nans(column_view const& input,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_nans(input, replacement, cudf::default_stream_value, mr);
+  return detail::replace_nans(input, replacement, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
@@ -224,7 +224,7 @@ std::unique_ptr<column> normalize_nans_and_zeros(column_view const& input,
                                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::normalize_nans_and_zeros(input, cudf::default_stream_value, mr);
+  return detail::normalize_nans_and_zeros(input, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -240,7 +240,7 @@ std::unique_ptr<column> normalize_nans_and_zeros(column_view const& input,
 void normalize_nans_and_zeros(mutable_column_view& in_out)
 {
   CUDF_FUNC_RANGE();
-  detail::normalize_nans_and_zeros(in_out, cudf::default_stream_value);
+  detail::normalize_nans_and_zeros(in_out, cudf::get_default_stream());
 }
 
 }  // namespace cudf
diff --git a/cpp/src/replace/nulls.cu b/cpp/src/replace/nulls.cu
index 232392db0c6..d2d524ef9ba 100644
--- a/cpp/src/replace/nulls.cu
+++ b/cpp/src/replace/nulls.cu
@@ -453,7 +453,7 @@ std::unique_ptr<cudf::column> replace_nulls(cudf::column_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_nulls(input, replacement, cudf::default_stream_value, mr);
+  return detail::replace_nulls(input, replacement, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> replace_nulls(cudf::column_view const& input,
@@ -461,7 +461,7 @@ std::unique_ptr<cudf::column> replace_nulls(cudf::column_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_nulls(input, replacement, cudf::default_stream_value, mr);
+  return detail::replace_nulls(input, replacement, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> replace_nulls(column_view const& input,
@@ -469,7 +469,7 @@ std::unique_ptr<cudf::column> replace_nulls(column_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_nulls(input, replace_policy, cudf::default_stream_value, mr);
+  return detail::replace_nulls(input, replace_policy, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu
index b6048333bc9..2a675c00b48 100644
--- a/cpp/src/replace/replace.cu
+++ b/cpp/src/replace/replace.cu
@@ -531,6 +531,6 @@ std::unique_ptr<cudf::column> find_and_replace_all(cudf::column_view const& inpu
                                                    rmm::mr::device_memory_resource* mr)
 {
   return detail::find_and_replace_all(
-    input_col, values_to_replace, replacement_values, cudf::default_stream_value, mr);
+    input_col, values_to_replace, replacement_values, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/reshape/byte_cast.cu b/cpp/src/reshape/byte_cast.cu
index 3d0510e1e6b..227ad2dad9c 100644
--- a/cpp/src/reshape/byte_cast.cu
+++ b/cpp/src/reshape/byte_cast.cu
@@ -147,7 +147,7 @@ std::unique_ptr<column> byte_cast(column_view const& input_column,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::byte_cast(input_column, endian_configuration, cudf::default_stream_value, mr);
+  return detail::byte_cast(input_column, endian_configuration, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/reshape/interleave_columns.cu b/cpp/src/reshape/interleave_columns.cu
index 3a3397dc1d5..bf316ea20bf 100644
--- a/cpp/src/reshape/interleave_columns.cu
+++ b/cpp/src/reshape/interleave_columns.cu
@@ -294,7 +294,7 @@ std::unique_ptr<column> interleave_columns(table_view const& input,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::interleave_columns(input, cudf::default_stream_value, mr);
+  return detail::interleave_columns(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/reshape/tile.cu b/cpp/src/reshape/tile.cu
index 95358ddab01..18174ef1001 100644
--- a/cpp/src/reshape/tile.cu
+++ b/cpp/src/reshape/tile.cu
@@ -65,7 +65,7 @@ std::unique_ptr<table> tile(const table_view& in,
                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::tile(in, count, cudf::default_stream_value, mr);
+  return detail::tile(in, count, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/rolling/detail/range_window_bounds.hpp b/cpp/src/rolling/detail/range_window_bounds.hpp
index 266f397b1e3..506bd54e5eb 100644
--- a/cpp/src/rolling/detail/range_window_bounds.hpp
+++ b/cpp/src/rolling/detail/range_window_bounds.hpp
@@ -149,7 +149,7 @@ template <typename OrderByType>
 range_rep_type<OrderByType> range_comparable_value(
   range_window_bounds const& range_bounds,
   data_type const& order_by_data_type = data_type{type_to_id<OrderByType>()},
-  rmm::cuda_stream_view stream        = cudf::default_stream_value)
+  rmm::cuda_stream_view stream        = cudf::get_default_stream())
 {
   auto const& range_scalar = range_bounds.range_scalar();
   using range_type         = cudf::detail::range_type<OrderByType>;
diff --git a/cpp/src/rolling/grouped_rolling.cu b/cpp/src/rolling/grouped_rolling.cu
index c1be33a9cd5..960dbfb9dfe 100644
--- a/cpp/src/rolling/grouped_rolling.cu
+++ b/cpp/src/rolling/grouped_rolling.cu
@@ -211,7 +211,7 @@ std::unique_ptr<column> grouped_rolling_window(table_view const& group_keys,
                                         following_window_bounds,
                                         min_periods,
                                         aggr,
-                                        cudf::default_stream_value,
+                                        cudf::get_default_stream(),
                                         mr);
 }
 
@@ -1049,7 +1049,7 @@ std::unique_ptr<column> grouped_time_range_rolling_window(table_view const& grou
                                               following,
                                               min_periods,
                                               aggr,
-                                              cudf::default_stream_value,
+                                              cudf::get_default_stream(),
                                               mr);
 }
 
@@ -1089,7 +1089,7 @@ std::unique_ptr<column> grouped_time_range_rolling_window(table_view const& grou
                                               following,
                                               min_periods,
                                               aggr,
-                                              cudf::default_stream_value,
+                                              cudf::get_default_stream(),
                                               mr);
 }
 
@@ -1124,7 +1124,7 @@ std::unique_ptr<column> grouped_range_rolling_window(table_view const& group_key
                                               following,
                                               min_periods,
                                               aggr,
-                                              cudf::default_stream_value,
+                                              cudf::get_default_stream(),
                                               mr);
 }
 
diff --git a/cpp/src/rolling/rolling.cu b/cpp/src/rolling/rolling.cu
index f11eaad351d..d699d7bea85 100644
--- a/cpp/src/rolling/rolling.cu
+++ b/cpp/src/rolling/rolling.cu
@@ -41,7 +41,7 @@ std::unique_ptr<column> rolling_window(column_view const& input,
                                 following_window,
                                 min_periods,
                                 agg,
-                                cudf::default_stream_value,
+                                cudf::get_default_stream(),
                                 mr);
 }
 
@@ -62,7 +62,7 @@ std::unique_ptr<column> rolling_window(column_view const& input,
                                 following_window,
                                 min_periods,
                                 agg,
-                                cudf::default_stream_value,
+                                cudf::get_default_stream(),
                                 mr);
 }
 
@@ -76,7 +76,7 @@ std::unique_ptr<column> rolling_window(column_view const& input,
 {
   CUDF_FUNC_RANGE();
   return detail::rolling_window(
-    input, preceding_window, following_window, min_periods, agg, cudf::default_stream_value, mr);
+    input, preceding_window, following_window, min_periods, agg, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/round/round.cu b/cpp/src/round/round.cu
index c60ce7295fb..58e21fc97ab 100644
--- a/cpp/src/round/round.cu
+++ b/cpp/src/round/round.cu
@@ -348,7 +348,7 @@ std::unique_ptr<column> round(column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::round(input, decimal_places, method, cudf::default_stream_value, mr);
+  return detail::round(input, decimal_places, method, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/scalar/scalar.cpp b/cpp/src/scalar/scalar.cpp
index 2af3867cca7..0fe04ed1305 100644
--- a/cpp/src/scalar/scalar.cpp
+++ b/cpp/src/scalar/scalar.cpp
@@ -109,7 +109,7 @@ size_type string_scalar::size() const { return _data.size(); }
 
 const char* string_scalar::data() const { return static_cast<const char*>(_data.data()); }
 
-string_scalar::operator std::string() const { return this->to_string(cudf::default_stream_value); }
+string_scalar::operator std::string() const { return this->to_string(cudf::get_default_stream()); }
 
 std::string string_scalar::to_string(rmm::cuda_stream_view stream) const
 {
@@ -186,7 +186,7 @@ T fixed_point_scalar<T>::fixed_point_value(rmm::cuda_stream_view stream) const
 template <typename T>
 fixed_point_scalar<T>::operator value_type() const
 {
-  return this->fixed_point_value(cudf::default_stream_value);
+  return this->fixed_point_value(cudf::get_default_stream());
 }
 
 template <typename T>
@@ -269,7 +269,7 @@ T const* fixed_width_scalar<T>::data() const
 template <typename T>
 fixed_width_scalar<T>::operator value_type() const
 {
-  return this->value(cudf::default_stream_value);
+  return this->value(cudf::get_default_stream());
 }
 
 /**
diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu
index c7631385270..31edf88a8cf 100644
--- a/cpp/src/search/contains_column.cu
+++ b/cpp/src/search/contains_column.cu
@@ -155,7 +155,7 @@ std::unique_ptr<column> contains(column_view const& haystack,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains(haystack, needles, cudf::default_stream_value, mr);
+  return detail::contains(haystack, needles, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu
index 11c47c769fb..59c7a86d29c 100644
--- a/cpp/src/search/contains_scalar.cu
+++ b/cpp/src/search/contains_scalar.cu
@@ -153,7 +153,7 @@ bool contains(column_view const& haystack, scalar const& needle, rmm::cuda_strea
 bool contains(column_view const& haystack, scalar const& needle)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains(haystack, needle, cudf::default_stream_value);
+  return detail::contains(haystack, needle, cudf::get_default_stream());
 }
 
 }  // namespace cudf
diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu
index 754a17dc6d8..1da8d2313e6 100644
--- a/cpp/src/search/search_ordered.cu
+++ b/cpp/src/search/search_ordered.cu
@@ -147,7 +147,7 @@ std::unique_ptr<column> lower_bound(table_view const& haystack,
 {
   CUDF_FUNC_RANGE();
   return detail::lower_bound(
-    haystack, needles, column_order, null_precedence, cudf::default_stream_value, mr);
+    haystack, needles, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> upper_bound(table_view const& haystack,
@@ -158,7 +158,7 @@ std::unique_ptr<column> upper_bound(table_view const& haystack,
 {
   CUDF_FUNC_RANGE();
   return detail::upper_bound(
-    haystack, needles, column_order, null_precedence, cudf::default_stream_value, mr);
+    haystack, needles, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/sort/is_sorted.cu b/cpp/src/sort/is_sorted.cu
index 43f0a17ab27..459dcf5467f 100644
--- a/cpp/src/sort/is_sorted.cu
+++ b/cpp/src/sort/is_sorted.cu
@@ -84,7 +84,7 @@ bool is_sorted(cudf::table_view const& in,
   }
 
   return detail::is_sorted(
-    in, column_order, has_nulls(in), null_precedence, cudf::default_stream_value);
+    in, column_order, has_nulls(in), null_precedence, cudf::get_default_stream());
 }
 
 }  // namespace cudf
diff --git a/cpp/src/sort/rank.cu b/cpp/src/sort/rank.cu
index bcb9244231d..99e99704c10 100644
--- a/cpp/src/sort/rank.cu
+++ b/cpp/src/sort/rank.cu
@@ -352,7 +352,7 @@ std::unique_ptr<column> rank(column_view const& input,
                       null_handling,
                       null_precedence,
                       percentage,
-                      cudf::default_stream_value,
+                      cudf::get_default_stream(),
                       mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu
index c5f13df5305..20017eda402 100644
--- a/cpp/src/sort/segmented_sort.cu
+++ b/cpp/src/sort/segmented_sort.cu
@@ -220,7 +220,7 @@ std::unique_ptr<column> segmented_sorted_order(table_view const& keys,
 {
   CUDF_FUNC_RANGE();
   return detail::segmented_sorted_order(
-    keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr);
+    keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> stable_segmented_sorted_order(
@@ -232,7 +232,7 @@ std::unique_ptr<column> stable_segmented_sorted_order(
 {
   CUDF_FUNC_RANGE();
   return detail::stable_segmented_sorted_order(
-    keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr);
+    keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> segmented_sort_by_key(table_view const& values,
@@ -244,7 +244,7 @@ std::unique_ptr<table> segmented_sort_by_key(table_view const& values,
 {
   CUDF_FUNC_RANGE();
   return detail::segmented_sort_by_key(
-    values, keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr);
+    values, keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> stable_segmented_sort_by_key(table_view const& values,
@@ -256,7 +256,7 @@ std::unique_ptr<table> stable_segmented_sort_by_key(table_view const& values,
 {
   CUDF_FUNC_RANGE();
   return detail::stable_segmented_sort_by_key(
-    values, keys, segment_offsets, column_order, null_precedence, cudf::default_stream_value, mr);
+    values, keys, segment_offsets, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/sort/sort.cu b/cpp/src/sort/sort.cu
index 5089f233916..34041bddeb8 100644
--- a/cpp/src/sort/sort.cu
+++ b/cpp/src/sort/sort.cu
@@ -100,7 +100,7 @@ std::unique_ptr<table> sort(table_view const& input,
     return std::make_unique<table>(std::move(columns));
   }
   return detail::sort_by_key(
-    input, input, column_order, null_precedence, cudf::default_stream_value, mr);
+    input, input, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace detail
@@ -111,7 +111,7 @@ std::unique_ptr<column> sorted_order(table_view const& input,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sorted_order(input, column_order, null_precedence, cudf::default_stream_value, mr);
+  return detail::sorted_order(input, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> sort(table_view const& input,
@@ -120,7 +120,7 @@ std::unique_ptr<table> sort(table_view const& input,
                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::sort(input, column_order, null_precedence, cudf::default_stream_value, mr);
+  return detail::sort(input, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> sort_by_key(table_view const& values,
@@ -131,7 +131,7 @@ std::unique_ptr<table> sort_by_key(table_view const& values,
 {
   CUDF_FUNC_RANGE();
   return detail::sort_by_key(
-    values, keys, column_order, null_precedence, cudf::default_stream_value, mr);
+    values, keys, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/sort/stable_sort.cu b/cpp/src/sort/stable_sort.cu
index a7e5d86ded0..ff2cb871162 100644
--- a/cpp/src/sort/stable_sort.cu
+++ b/cpp/src/sort/stable_sort.cu
@@ -65,7 +65,7 @@ std::unique_ptr<column> stable_sorted_order(table_view const& input,
 {
   CUDF_FUNC_RANGE();
   return detail::stable_sorted_order(
-    input, column_order, null_precedence, cudf::default_stream_value, mr);
+    input, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> stable_sort_by_key(table_view const& values,
@@ -76,7 +76,7 @@ std::unique_ptr<table> stable_sort_by_key(table_view const& values,
 {
   CUDF_FUNC_RANGE();
   return detail::stable_sort_by_key(
-    values, keys, column_order, null_precedence, cudf::default_stream_value, mr);
+    values, keys, column_order, null_precedence, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/apply_boolean_mask.cu b/cpp/src/stream_compaction/apply_boolean_mask.cu
index 54688672d20..8f707f6d15d 100644
--- a/cpp/src/stream_compaction/apply_boolean_mask.cu
+++ b/cpp/src/stream_compaction/apply_boolean_mask.cu
@@ -93,6 +93,6 @@ std::unique_ptr<table> apply_boolean_mask(table_view const& input,
                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::apply_boolean_mask(input, boolean_mask, cudf::default_stream_value, mr);
+  return detail::apply_boolean_mask(input, boolean_mask, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/distinct.cu b/cpp/src/stream_compaction/distinct.cu
index a03e4c4441a..02889d4f447 100644
--- a/cpp/src/stream_compaction/distinct.cu
+++ b/cpp/src/stream_compaction/distinct.cu
@@ -159,7 +159,7 @@ std::unique_ptr<table> distinct(table_view const& input,
 {
   CUDF_FUNC_RANGE();
   return detail::distinct(
-    input, keys, keep, nulls_equal, nans_equal, cudf::default_stream_value, mr);
+    input, keys, keep, nulls_equal, nans_equal, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/drop_nans.cu b/cpp/src/stream_compaction/drop_nans.cu
index 4429c952277..a645b46f7a7 100644
--- a/cpp/src/stream_compaction/drop_nans.cu
+++ b/cpp/src/stream_compaction/drop_nans.cu
@@ -119,7 +119,7 @@ std::unique_ptr<table> drop_nans(table_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nans(input, keys, keep_threshold, cudf::default_stream_value, mr);
+  return detail::drop_nans(input, keys, keep_threshold, cudf::get_default_stream(), mr);
 }
 /*
  * Filters a table to remove nan elements.
@@ -129,7 +129,7 @@ std::unique_ptr<table> drop_nans(table_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nans(input, keys, keys.size(), cudf::default_stream_value, mr);
+  return detail::drop_nans(input, keys, keys.size(), cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/drop_nulls.cu b/cpp/src/stream_compaction/drop_nulls.cu
index c5f3e0df1e2..6ea1fd4c31f 100644
--- a/cpp/src/stream_compaction/drop_nulls.cu
+++ b/cpp/src/stream_compaction/drop_nulls.cu
@@ -92,7 +92,7 @@ std::unique_ptr<table> drop_nulls(table_view const& input,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nulls(input, keys, keep_threshold, cudf::default_stream_value, mr);
+  return detail::drop_nulls(input, keys, keep_threshold, cudf::get_default_stream(), mr);
 }
 /*
  * Filters a table to remove null elements.
@@ -102,7 +102,7 @@ std::unique_ptr<table> drop_nulls(table_view const& input,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::drop_nulls(input, keys, keys.size(), cudf::default_stream_value, mr);
+  return detail::drop_nulls(input, keys, keys.size(), cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/stream_compaction/unique.cu b/cpp/src/stream_compaction/unique.cu
index 83c51a92633..6b432176acb 100644
--- a/cpp/src/stream_compaction/unique.cu
+++ b/cpp/src/stream_compaction/unique.cu
@@ -99,7 +99,7 @@ std::unique_ptr<table> unique(table_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::unique(input, keys, keep, nulls_equal, cudf::default_stream_value, mr);
+  return detail::unique(input, keys, keep, nulls_equal, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index 0dd1a870b8a..ea01b570b91 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -185,21 +185,21 @@ std::unique_ptr<column> count_characters(strings_column_view const& strings,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_characters(strings, cudf::default_stream_value, mr);
+  return detail::count_characters(strings, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> count_bytes(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_bytes(strings, cudf::default_stream_value, mr);
+  return detail::count_bytes(strings, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> code_points(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::code_points(strings, cudf::default_stream_value, mr);
+  return detail::code_points(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/capitalize.cu b/cpp/src/strings/capitalize.cu
index 4328765773f..58134ab28d1 100644
--- a/cpp/src/strings/capitalize.cu
+++ b/cpp/src/strings/capitalize.cu
@@ -289,7 +289,7 @@ std::unique_ptr<column> capitalize(strings_column_view const& input,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::capitalize(input, delimiter, cudf::default_stream_value, mr);
+  return detail::capitalize(input, delimiter, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> title(strings_column_view const& input,
@@ -297,14 +297,14 @@ std::unique_ptr<column> title(strings_column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::title(input, sequence_type, cudf::default_stream_value, mr);
+  return detail::title(input, sequence_type, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_title(strings_column_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_title(input, cudf::default_stream_value, mr);
+  return detail::is_title(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index cabb1241f1b..05c2904ec9e 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -185,21 +185,21 @@ std::unique_ptr<column> to_lower(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_lower(strings, cudf::default_stream_value, mr);
+  return detail::to_lower(strings, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> to_upper(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_upper(strings, cudf::default_stream_value, mr);
+  return detail::to_upper(strings, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> swapcase(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::swapcase(strings, cudf::default_stream_value, mr);
+  return detail::swapcase(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu
index 4010ec8861a..0426d82c6c6 100644
--- a/cpp/src/strings/char_types/char_types.cu
+++ b/cpp/src/strings/char_types/char_types.cu
@@ -197,7 +197,7 @@ std::unique_ptr<column> all_characters_of_type(strings_column_view const& string
 {
   CUDF_FUNC_RANGE();
   return detail::all_characters_of_type(
-    strings, types, verify_types, cudf::default_stream_value, mr);
+    strings, types, verify_types, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> filter_characters_of_type(strings_column_view const& strings,
@@ -208,7 +208,7 @@ std::unique_ptr<column> filter_characters_of_type(strings_column_view const& str
 {
   CUDF_FUNC_RANGE();
   return detail::filter_characters_of_type(
-    strings, types_to_remove, replacement, types_to_keep, cudf::default_stream_value, mr);
+    strings, types_to_remove, replacement, types_to_keep, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/combine/concatenate.cu b/cpp/src/strings/combine/concatenate.cu
index ae94348cbb4..e98ae537ddd 100644
--- a/cpp/src/strings/combine/concatenate.cu
+++ b/cpp/src/strings/combine/concatenate.cu
@@ -270,7 +270,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
 {
   CUDF_FUNC_RANGE();
   return detail::concatenate(
-    strings_columns, separator, narep, separate_nulls, cudf::default_stream_value, mr);
+    strings_columns, separator, narep, separate_nulls, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> concatenate(table_view const& strings_columns,
@@ -286,7 +286,7 @@ std::unique_ptr<column> concatenate(table_view const& strings_columns,
                              separator_narep,
                              col_narep,
                              separate_nulls,
-                             cudf::default_stream_value,
+                             cudf::get_default_stream(),
                              mr);
 }
 
diff --git a/cpp/src/strings/combine/join.cu b/cpp/src/strings/combine/join.cu
index f450ce4019e..6537ce168e5 100644
--- a/cpp/src/strings/combine/join.cu
+++ b/cpp/src/strings/combine/join.cu
@@ -135,7 +135,7 @@ std::unique_ptr<column> join_strings(strings_column_view const& strings,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::join_strings(strings, separator, narep, cudf::default_stream_value, mr);
+  return detail::join_strings(strings, separator, narep, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/combine/join_list_elements.cu b/cpp/src/strings/combine/join_list_elements.cu
index 1d0ee94d306..ec2e65d7ad5 100644
--- a/cpp/src/strings/combine/join_list_elements.cu
+++ b/cpp/src/strings/combine/join_list_elements.cu
@@ -308,7 +308,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
                                     narep,
                                     separate_nulls,
                                     empty_list_policy,
-                                    cudf::default_stream_value,
+                                    cudf::get_default_stream(),
                                     mr);
 }
 
@@ -327,7 +327,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
                                     string_narep,
                                     separate_nulls,
                                     empty_list_policy,
-                                    cudf::default_stream_value,
+                                    cudf::get_default_stream(),
                                     mr);
 }
 
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index b7d154c4808..c6e71b00809 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -136,7 +136,7 @@ std::unique_ptr<column> contains_re(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains_re(strings, pattern, flags, cudf::default_stream_value, mr);
+  return detail::contains_re(strings, pattern, flags, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> matches_re(strings_column_view const& strings,
@@ -145,7 +145,7 @@ std::unique_ptr<column> matches_re(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::matches_re(strings, pattern, flags, cudf::default_stream_value, mr);
+  return detail::matches_re(strings, pattern, flags, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> count_re(strings_column_view const& strings,
@@ -154,7 +154,7 @@ std::unique_ptr<column> count_re(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_re(strings, pattern, flags, cudf::default_stream_value, mr);
+  return detail::count_re(strings, pattern, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_booleans.cu b/cpp/src/strings/convert/convert_booleans.cu
index 196929a9377..da4728da331 100644
--- a/cpp/src/strings/convert/convert_booleans.cu
+++ b/cpp/src/strings/convert/convert_booleans.cu
@@ -86,7 +86,7 @@ std::unique_ptr<column> to_booleans(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_booleans(strings, true_string, cudf::default_stream_value, mr);
+  return detail::to_booleans(strings, true_string, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -155,7 +155,7 @@ std::unique_ptr<column> from_booleans(column_view const& booleans,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_booleans(booleans, true_string, false_string, cudf::default_stream_value, mr);
+  return detail::from_booleans(booleans, true_string, false_string, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu
index e70ae09de84..0cc2ef341d4 100644
--- a/cpp/src/strings/convert/convert_datetime.cu
+++ b/cpp/src/strings/convert/convert_datetime.cu
@@ -653,7 +653,7 @@ std::unique_ptr<cudf::column> to_timestamps(strings_column_view const& input,
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_timestamps(input, timestamp_type, format, cudf::default_stream_value, mr);
+  return detail::to_timestamps(input, timestamp_type, format, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> is_timestamp(strings_column_view const& input,
@@ -661,7 +661,7 @@ std::unique_ptr<cudf::column> is_timestamp(strings_column_view const& input,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_timestamp(input, format, cudf::default_stream_value, mr);
+  return detail::is_timestamp(input, format, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -1149,7 +1149,7 @@ std::unique_ptr<column> from_timestamps(column_view const& timestamps,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_timestamps(timestamps, format, names, cudf::default_stream_value, mr);
+  return detail::from_timestamps(timestamps, format, names, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu
index ac64bceae54..0e2092fd31c 100644
--- a/cpp/src/strings/convert/convert_durations.cu
+++ b/cpp/src/strings/convert/convert_durations.cu
@@ -748,7 +748,7 @@ std::unique_ptr<column> from_durations(column_view const& durations,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_durations(durations, format, cudf::default_stream_value, mr);
+  return detail::from_durations(durations, format, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> to_durations(strings_column_view const& strings,
@@ -757,7 +757,7 @@ std::unique_ptr<column> to_durations(strings_column_view const& strings,
                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_durations(strings, duration_type, format, cudf::default_stream_value, mr);
+  return detail::to_durations(strings, duration_type, format, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_fixed_point.cu b/cpp/src/strings/convert/convert_fixed_point.cu
index 94bb235d1cd..402be192572 100644
--- a/cpp/src/strings/convert/convert_fixed_point.cu
+++ b/cpp/src/strings/convert/convert_fixed_point.cu
@@ -191,7 +191,7 @@ std::unique_ptr<column> to_fixed_point(strings_column_view const& strings,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_fixed_point(strings, output_type, cudf::default_stream_value, mr);
+  return detail::to_fixed_point(strings, output_type, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -334,7 +334,7 @@ std::unique_ptr<column> from_fixed_point(column_view const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_fixed_point(input, cudf::default_stream_value, mr);
+  return detail::from_fixed_point(input, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -398,7 +398,7 @@ std::unique_ptr<column> is_fixed_point(strings_column_view const& input,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_fixed_point(input, decimal_type, cudf::default_stream_value, mr);
+  return detail::is_fixed_point(input, decimal_type, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu
index 4c11707f2c6..2de4bd2a2cc 100644
--- a/cpp/src/strings/convert/convert_floats.cu
+++ b/cpp/src/strings/convert/convert_floats.cu
@@ -125,7 +125,7 @@ std::unique_ptr<column> to_floats(strings_column_view const& strings,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_floats(strings, output_type, cudf::default_stream_value, mr);
+  return detail::to_floats(strings, output_type, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -450,7 +450,7 @@ std::unique_ptr<column> from_floats(column_view const& floats,
 std::unique_ptr<column> from_floats(column_view const& floats, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_floats(floats, cudf::default_stream_value, mr);
+  return detail::from_floats(floats, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -489,7 +489,7 @@ std::unique_ptr<column> is_float(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_float(strings, cudf::default_stream_value, mr);
+  return detail::is_float(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu
index c327f7da00e..dbbdffac2c2 100644
--- a/cpp/src/strings/convert/convert_hex.cu
+++ b/cpp/src/strings/convert/convert_hex.cu
@@ -284,21 +284,21 @@ std::unique_ptr<column> hex_to_integers(strings_column_view const& strings,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::hex_to_integers(strings, output_type, cudf::default_stream_value, mr);
+  return detail::hex_to_integers(strings, output_type, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_hex(strings_column_view const& strings,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_hex(strings, cudf::default_stream_value, mr);
+  return detail::is_hex(strings, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> integers_to_hex(column_view const& input,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::integers_to_hex(input, cudf::default_stream_value, mr);
+  return detail::integers_to_hex(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu
index abce70ef4d5..343288af0c1 100644
--- a/cpp/src/strings/convert/convert_integers.cu
+++ b/cpp/src/strings/convert/convert_integers.cu
@@ -209,7 +209,7 @@ std::unique_ptr<column> is_integer(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_integer(strings, cudf::default_stream_value, mr);
+  return detail::is_integer(strings, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_integer(strings_column_view const& strings,
@@ -217,7 +217,7 @@ std::unique_ptr<column> is_integer(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_integer(strings, int_type, cudf::default_stream_value, mr);
+  return detail::is_integer(strings, int_type, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -310,7 +310,7 @@ std::unique_ptr<column> to_integers(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::to_integers(strings, output_type, cudf::default_stream_value, mr);
+  return detail::to_integers(strings, output_type, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -431,7 +431,7 @@ std::unique_ptr<column> from_integers(column_view const& integers,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::from_integers(integers, cudf::default_stream_value, mr);
+  return detail::from_integers(integers, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu
index 4dbdd3fc9d8..5229f0fdf1b 100644
--- a/cpp/src/strings/convert/convert_ipv4.cu
+++ b/cpp/src/strings/convert/convert_ipv4.cu
@@ -110,7 +110,7 @@ std::unique_ptr<column> ipv4_to_integers(strings_column_view const& strings,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::ipv4_to_integers(strings, cudf::default_stream_value, mr);
+  return detail::ipv4_to_integers(strings, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -264,14 +264,14 @@ std::unique_ptr<column> integers_to_ipv4(column_view const& integers,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::integers_to_ipv4(integers, cudf::default_stream_value, mr);
+  return detail::integers_to_ipv4(integers, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_ipv4(strings_column_view const& strings,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_ipv4(strings, cudf::default_stream_value, mr);
+  return detail::is_ipv4(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_lists.cu b/cpp/src/strings/convert/convert_lists.cu
index 547052d5680..289fa9a1c05 100644
--- a/cpp/src/strings/convert/convert_lists.cu
+++ b/cpp/src/strings/convert/convert_lists.cu
@@ -235,7 +235,7 @@ std::unique_ptr<column> format_list_column(lists_column_view const& input,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::format_list_column(input, na_rep, separators, cudf::default_stream_value, mr);
+  return detail::format_list_column(input, na_rep, separators, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu
index ca32383e73f..0c6ecf46313 100644
--- a/cpp/src/strings/convert/convert_urls.cu
+++ b/cpp/src/strings/convert/convert_urls.cu
@@ -172,7 +172,7 @@ std::unique_ptr<column> url_encode(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::url_encode(strings, cudf::default_stream_value, mr);
+  return detail::url_encode(strings, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -454,7 +454,7 @@ std::unique_ptr<column> url_decode(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::url_decode(strings, cudf::default_stream_value, mr);
+  return detail::url_decode(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu
index 76d2f84b1a0..882b85d1066 100644
--- a/cpp/src/strings/extract/extract.cu
+++ b/cpp/src/strings/extract/extract.cu
@@ -136,7 +136,7 @@ std::unique_ptr<table> extract(strings_column_view const& strings,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract(strings, pattern, flags, cudf::default_stream_value, mr);
+  return detail::extract(strings, pattern, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index 76c2788c1be..1ba5a8a1470 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -171,7 +171,7 @@ std::unique_ptr<column> extract_all_record(strings_column_view const& strings,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_all_record(strings, pattern, flags, cudf::default_stream_value, mr);
+  return detail::extract_all_record(strings, pattern, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/filter_chars.cu b/cpp/src/strings/filter_chars.cu
index 7f0332ba9cc..b30b0e89c28 100644
--- a/cpp/src/strings/filter_chars.cu
+++ b/cpp/src/strings/filter_chars.cu
@@ -160,7 +160,7 @@ std::unique_ptr<column> filter_characters(
 {
   CUDF_FUNC_RANGE();
   return detail::filter_characters(
-    strings, characters_to_filter, keep_characters, replacement, cudf::default_stream_value, mr);
+    strings, characters_to_filter, keep_characters, replacement, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
index 9ec1ec248e5..303c35ea7fb 100644
--- a/cpp/src/strings/json/json_path.cu
+++ b/cpp/src/strings/json/json_path.cu
@@ -1047,7 +1047,7 @@ std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& c
                                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::get_json_object(col, json_path, options, cudf::default_stream_value, mr);
+  return detail::get_json_object(col, json_path, options, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/like.cu b/cpp/src/strings/like.cu
index 2d3a3d3d52a..cb6fc844426 100644
--- a/cpp/src/strings/like.cu
+++ b/cpp/src/strings/like.cu
@@ -147,7 +147,7 @@ std::unique_ptr<column> like(strings_column_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::like(input, pattern, escape_character, cudf::default_stream_value, mr);
+  return detail::like(input, pattern, escape_character, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu
index e4002525af9..d84b4afc7cf 100644
--- a/cpp/src/strings/padding.cu
+++ b/cpp/src/strings/padding.cu
@@ -58,7 +58,7 @@ std::unique_ptr<column> pad(
   size_type width,
   side_type side                      = side_type::RIGHT,
   std::string_view fill_char          = " ",
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   size_type strings_count = strings.size();
@@ -180,7 +180,7 @@ std::unique_ptr<column> pad(strings_column_view const& input,
                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::pad(input, width, side, fill_char, cudf::default_stream_value, mr);
+  return detail::pad(input, width, side, fill_char, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> zfill(strings_column_view const& input,
@@ -188,7 +188,7 @@ std::unique_ptr<column> zfill(strings_column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::zfill(input, width, cudf::default_stream_value, mr);
+  return detail::zfill(input, width, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/repeat_strings.cu b/cpp/src/strings/repeat_strings.cu
index 959229bbb87..5d02069d7f3 100644
--- a/cpp/src/strings/repeat_strings.cu
+++ b/cpp/src/strings/repeat_strings.cu
@@ -385,7 +385,7 @@ std::unique_ptr<string_scalar> repeat_string(string_scalar const& input,
                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat_string(input, repeat_times, cudf::default_stream_value, mr);
+  return detail::repeat_string(input, repeat_times, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
@@ -393,7 +393,7 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat_strings(input, repeat_times, cudf::default_stream_value, mr);
+  return detail::repeat_strings(input, repeat_times, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> repeat_strings(strings_column_view const& input,
@@ -403,7 +403,7 @@ std::unique_ptr<column> repeat_strings(strings_column_view const& input,
 {
   CUDF_FUNC_RANGE();
   return detail::repeat_strings(
-    input, repeat_times, output_strings_sizes, cudf::default_stream_value, mr);
+    input, repeat_times, output_strings_sizes, cudf::get_default_stream(), mr);
 }
 
 std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
@@ -412,7 +412,7 @@ std::pair<std::unique_ptr<column>, int64_t> repeat_strings_output_sizes(
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::repeat_strings_output_sizes(input, repeat_times, cudf::default_stream_value, mr);
+  return detail::repeat_strings_output_sizes(input, repeat_times, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu
index e0a995c26b9..9658610da18 100644
--- a/cpp/src/strings/replace/backref_re.cu
+++ b/cpp/src/strings/replace/backref_re.cu
@@ -153,7 +153,7 @@ std::unique_ptr<column> replace_with_backrefs(strings_column_view const& strings
 {
   CUDF_FUNC_RANGE();
   return detail::replace_with_backrefs(
-    strings, pattern, replacement, flags, cudf::default_stream_value, mr);
+    strings, pattern, replacement, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index a5b9ad37e65..cc5cf1384ec 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -198,7 +198,7 @@ std::unique_ptr<column> replace_re(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_re(strings, patterns, replacements, flags, cudf::default_stream_value, mr);
+  return detail::replace_re(strings, patterns, replacements, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/replace/replace.cu b/cpp/src/strings/replace/replace.cu
index de875014054..1cb7de5dc3b 100644
--- a/cpp/src/strings/replace/replace.cu
+++ b/cpp/src/strings/replace/replace.cu
@@ -843,7 +843,7 @@ std::unique_ptr<column> replace(strings_column_view const& strings,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace(strings, target, repl, maxrepl, cudf::default_stream_value, mr);
+  return detail::replace(strings, target, repl, maxrepl, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> replace_slice(strings_column_view const& strings,
@@ -853,7 +853,7 @@ std::unique_ptr<column> replace_slice(strings_column_view const& strings,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace_slice(strings, repl, start, stop, cudf::default_stream_value, mr);
+  return detail::replace_slice(strings, repl, start, stop, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> replace(strings_column_view const& strings,
@@ -862,7 +862,7 @@ std::unique_ptr<column> replace(strings_column_view const& strings,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::replace(strings, targets, repls, cudf::default_stream_value, mr);
+  return detail::replace(strings, targets, repls, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index fd0049d7c89..34175f2ec6c 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -106,7 +106,7 @@ std::unique_ptr<column> replace_re(
   string_scalar const& replacement,
   std::optional<size_type> max_replace_count,
   regex_flags const flags,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
@@ -144,7 +144,7 @@ std::unique_ptr<column> replace_re(strings_column_view const& strings,
 {
   CUDF_FUNC_RANGE();
   return detail::replace_re(
-    strings, pattern, replacement, max_replace_count, flags, cudf::default_stream_value, mr);
+    strings, pattern, replacement, max_replace_count, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu
index fa8581558a0..c48aedc5499 100644
--- a/cpp/src/strings/search/find.cu
+++ b/cpp/src/strings/search/find.cu
@@ -107,7 +107,7 @@ std::unique_ptr<column> find(
   string_scalar const& target,
   size_type start                     = 0,
   size_type stop                      = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto pfn = [] __device__(
@@ -127,7 +127,7 @@ std::unique_ptr<column> rfind(
   string_scalar const& target,
   size_type start                     = 0,
   size_type stop                      = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto pfn = [] __device__(
@@ -153,7 +153,7 @@ std::unique_ptr<column> find(strings_column_view const& strings,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::find(strings, target, start, stop, cudf::default_stream_value, mr);
+  return detail::find(strings, target, start, stop, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> rfind(strings_column_view const& strings,
@@ -163,7 +163,7 @@ std::unique_ptr<column> rfind(strings_column_view const& strings,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rfind(strings, target, start, stop, cudf::default_stream_value, mr);
+  return detail::rfind(strings, target, start, stop, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -463,7 +463,7 @@ std::unique_ptr<column> contains(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains(strings, target, cudf::default_stream_value, mr);
+  return detail::contains(strings, target, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> contains(strings_column_view const& strings,
@@ -471,7 +471,7 @@ std::unique_ptr<column> contains(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains(strings, targets, cudf::default_stream_value, mr);
+  return detail::contains(strings, targets, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> starts_with(strings_column_view const& strings,
@@ -479,7 +479,7 @@ std::unique_ptr<column> starts_with(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::starts_with(strings, target, cudf::default_stream_value, mr);
+  return detail::starts_with(strings, target, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> starts_with(strings_column_view const& strings,
@@ -487,7 +487,7 @@ std::unique_ptr<column> starts_with(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::starts_with(strings, targets, cudf::default_stream_value, mr);
+  return detail::starts_with(strings, targets, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> ends_with(strings_column_view const& strings,
@@ -495,7 +495,7 @@ std::unique_ptr<column> ends_with(strings_column_view const& strings,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::ends_with(strings, target, cudf::default_stream_value, mr);
+  return detail::ends_with(strings, target, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> ends_with(strings_column_view const& strings,
@@ -503,7 +503,7 @@ std::unique_ptr<column> ends_with(strings_column_view const& strings,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::ends_with(strings, targets, cudf::default_stream_value, mr);
+  return detail::ends_with(strings, targets, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/search/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu
index 1e0f26b8650..389e6eccc43 100644
--- a/cpp/src/strings/search/find_multiple.cu
+++ b/cpp/src/strings/search/find_multiple.cu
@@ -92,7 +92,7 @@ std::unique_ptr<column> find_multiple(strings_column_view const& input,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::find_multiple(input, targets, cudf::default_stream_value, mr);
+  return detail::find_multiple(input, targets, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index 73470bde867..07829581aa6 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -139,7 +139,7 @@ std::unique_ptr<column> findall(strings_column_view const& input,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::findall(input, pattern, flags, cudf::default_stream_value, mr);
+  return detail::findall(input, pattern, flags, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/split/partition.cu b/cpp/src/strings/split/partition.cu
index 161c48383ff..acdd9efbb45 100644
--- a/cpp/src/strings/split/partition.cu
+++ b/cpp/src/strings/split/partition.cu
@@ -184,7 +184,7 @@ struct rpartition_fn : public partition_fn {
 std::unique_ptr<table> partition(
   strings_column_view const& strings,
   string_scalar const& delimiter      = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
@@ -212,7 +212,7 @@ std::unique_ptr<table> partition(
 std::unique_ptr<table> rpartition(
   strings_column_view const& strings,
   string_scalar const& delimiter      = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
@@ -246,7 +246,7 @@ std::unique_ptr<table> partition(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::partition(strings, delimiter, cudf::default_stream_value, mr);
+  return detail::partition(strings, delimiter, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> rpartition(strings_column_view const& strings,
@@ -254,7 +254,7 @@ std::unique_ptr<table> rpartition(strings_column_view const& strings,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rpartition(strings, delimiter, cudf::default_stream_value, mr);
+  return detail::rpartition(strings, delimiter, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
index 000029063e0..89b4c1d75c2 100644
--- a/cpp/src/strings/split/split.cu
+++ b/cpp/src/strings/split/split.cu
@@ -795,7 +795,7 @@ std::unique_ptr<table> split(
   strings_column_view const& strings_column,
   string_scalar const& delimiter      = string_scalar(""),
   size_type maxsplit                  = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
@@ -820,7 +820,7 @@ std::unique_ptr<table> rsplit(
   strings_column_view const& strings_column,
   string_scalar const& delimiter      = string_scalar(""),
   size_type maxsplit                  = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
@@ -851,7 +851,7 @@ std::unique_ptr<table> split(strings_column_view const& strings_column,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::split(strings_column, delimiter, maxsplit, cudf::default_stream_value, mr);
+  return detail::split(strings_column, delimiter, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
@@ -860,7 +860,7 @@ std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rsplit(strings_column, delimiter, maxsplit, cudf::default_stream_value, mr);
+  return detail::rsplit(strings_column, delimiter, maxsplit, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index e8de1da0d83..2538bab6229 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -334,7 +334,7 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::split_re(input, pattern, maxsplit, cudf::default_stream_value, mr);
+  return detail::split_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> split_record_re(strings_column_view const& input,
@@ -343,7 +343,7 @@ std::unique_ptr<column> split_record_re(strings_column_view const& input,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::split_record_re(input, pattern, maxsplit, cudf::default_stream_value, mr);
+  return detail::split_record_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> rsplit_re(strings_column_view const& input,
@@ -352,7 +352,7 @@ std::unique_ptr<table> rsplit_re(strings_column_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rsplit_re(input, pattern, maxsplit, cudf::default_stream_value, mr);
+  return detail::rsplit_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> rsplit_record_re(strings_column_view const& input,
@@ -361,7 +361,7 @@ std::unique_ptr<column> rsplit_record_re(strings_column_view const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rsplit_record_re(input, pattern, maxsplit, cudf::default_stream_value, mr);
+  return detail::rsplit_record_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
 }
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu
index 60c09ffd93a..83d8d7f9203 100644
--- a/cpp/src/strings/split/split_record.cu
+++ b/cpp/src/strings/split/split_record.cu
@@ -268,7 +268,7 @@ std::unique_ptr<column> split_record(
   strings_column_view const& strings,
   string_scalar const& delimiter      = string_scalar(""),
   size_type maxsplit                  = -1,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
@@ -304,7 +304,7 @@ std::unique_ptr<column> split_record(strings_column_view const& strings,
 {
   CUDF_FUNC_RANGE();
   return detail::split_record<detail::Dir::FORWARD>(
-    strings, delimiter, maxsplit, cudf::default_stream_value, mr);
+    strings, delimiter, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> rsplit_record(strings_column_view const& strings,
@@ -314,7 +314,7 @@ std::unique_ptr<column> rsplit_record(strings_column_view const& strings,
 {
   CUDF_FUNC_RANGE();
   return detail::split_record<detail::Dir::BACKWARD>(
-    strings, delimiter, maxsplit, cudf::default_stream_value, mr);
+    strings, delimiter, maxsplit, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu
index f5188ce1354..8c5916283be 100644
--- a/cpp/src/strings/strings_column_factories.cu
+++ b/cpp/src/strings/strings_column_factories.cu
@@ -61,7 +61,7 @@ std::unique_ptr<column> make_strings_column(
   device_span<size_type> offsets,
   size_type null_count,
   rmm::device_buffer&& null_mask,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu
index 5d51a5a7bed..baa6a27b4ba 100644
--- a/cpp/src/strings/strip.cu
+++ b/cpp/src/strings/strip.cu
@@ -73,7 +73,7 @@ std::unique_ptr<column> strip(
   strings_column_view const& input,
   side_type side                      = side_type::BOTH,
   string_scalar const& to_strip       = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
@@ -104,7 +104,7 @@ std::unique_ptr<column> strip(strings_column_view const& input,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::strip(input, side, to_strip, cudf::default_stream_value, mr);
+  return detail::strip(input, side, to_strip, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index 271a5375915..e0d1bc8cf31 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -110,7 +110,7 @@ std::unique_ptr<column> slice_strings(
   numeric_scalar<size_type> const& start = numeric_scalar<size_type>(0, false),
   numeric_scalar<size_type> const& stop  = numeric_scalar<size_type>(0, false),
   numeric_scalar<size_type> const& step  = numeric_scalar<size_type>(1),
-  rmm::cuda_stream_view stream           = cudf::default_stream_value,
+  rmm::cuda_stream_view stream           = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource())
 {
   if (strings.is_empty()) return make_empty_column(type_id::STRING);
@@ -143,7 +143,7 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice_strings(strings, start, stop, step, cudf::default_stream_value, mr);
+  return detail::slice_strings(strings, start, stop, step, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -398,7 +398,7 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
 {
   CUDF_FUNC_RANGE();
   return detail::slice_strings(
-    strings, starts_column, stops_column, cudf::default_stream_value, mr);
+    strings, starts_column, stops_column, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> slice_strings(strings_column_view const& strings,
@@ -410,7 +410,7 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
   return detail::slice_strings(strings,
                                cudf::detail::make_pair_iterator<string_view>(delimiter),
                                count,
-                               cudf::default_stream_value,
+                               cudf::get_default_stream(),
                                mr);
 }
 
@@ -420,7 +420,7 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::slice_strings(strings, delimiters, count, cudf::default_stream_value, mr);
+  return detail::slice_strings(strings, delimiters, count, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu
index 94e4d313109..01ecc49f10a 100644
--- a/cpp/src/strings/translate.cu
+++ b/cpp/src/strings/translate.cu
@@ -130,7 +130,7 @@ std::unique_ptr<column> translate(strings_column_view const& strings,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::translate(strings, chars_table, cudf::default_stream_value, mr);
+  return detail::translate(strings, chars_table, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/wrap.cu b/cpp/src/strings/wrap.cu
index 232e61c1965..cd0aafc3545 100644
--- a/cpp/src/strings/wrap.cu
+++ b/cpp/src/strings/wrap.cu
@@ -137,7 +137,7 @@ std::unique_ptr<column> wrap(strings_column_view const& strings,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::wrap<detail::execute_wrap>(strings, width, cudf::default_stream_value, mr);
+  return detail::wrap<detail::execute_wrap>(strings, width, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/text/detokenize.cu b/cpp/src/text/detokenize.cu
index 16cc8f4922d..5e86a7ca1f3 100644
--- a/cpp/src/text/detokenize.cu
+++ b/cpp/src/text/detokenize.cu
@@ -201,7 +201,7 @@ std::unique_ptr<cudf::column> detokenize(cudf::strings_column_view const& string
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::detokenize(strings, row_indices, separator, cudf::default_stream_value, mr);
+  return detail::detokenize(strings, row_indices, separator, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/edit_distance.cu b/cpp/src/text/edit_distance.cu
index 18658433d6c..fb0ecdb7677 100644
--- a/cpp/src/text/edit_distance.cu
+++ b/cpp/src/text/edit_distance.cu
@@ -309,7 +309,7 @@ std::unique_ptr<cudf::column> edit_distance(cudf::strings_column_view const& str
                                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::edit_distance(strings, targets, cudf::default_stream_value, mr);
+  return detail::edit_distance(strings, targets, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -319,7 +319,7 @@ std::unique_ptr<cudf::column> edit_distance_matrix(cudf::strings_column_view con
                                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::edit_distance_matrix(strings, cudf::default_stream_value, mr);
+  return detail::edit_distance_matrix(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu
index 7ffd2bd80a7..d5ff7b99344 100644
--- a/cpp/src/text/generate_ngrams.cu
+++ b/cpp/src/text/generate_ngrams.cu
@@ -88,7 +88,7 @@ std::unique_ptr<cudf::column> generate_ngrams(
   cudf::strings_column_view const& strings,
   cudf::size_type ngrams               = 2,
   cudf::string_scalar const& separator = cudf::string_scalar{"_"},
-  rmm::cuda_stream_view stream         = cudf::default_stream_value,
+  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(separator.is_valid(stream), "Parameter separator must be valid");
@@ -151,7 +151,7 @@ std::unique_ptr<cudf::column> generate_ngrams(cudf::strings_column_view const& s
                                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::generate_ngrams(strings, ngrams, separator, cudf::default_stream_value, mr);
+  return detail::generate_ngrams(strings, ngrams, separator, cudf::get_default_stream(), mr);
 }
 
 namespace detail {
@@ -261,7 +261,7 @@ std::unique_ptr<cudf::column> generate_character_ngrams(cudf::strings_column_vie
                                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::generate_character_ngrams(strings, ngrams, cudf::default_stream_value, mr);
+  return detail::generate_character_ngrams(strings, ngrams, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu
index f353b79f720..b0071ed9e88 100644
--- a/cpp/src/text/ngrams_tokenize.cu
+++ b/cpp/src/text/ngrams_tokenize.cu
@@ -139,7 +139,7 @@ std::unique_ptr<cudf::column> ngrams_tokenize(
   cudf::size_type ngrams               = 2,
   cudf::string_scalar const& delimiter = cudf::string_scalar(""),
   cudf::string_scalar const& separator = cudf::string_scalar{"_"},
-  rmm::cuda_stream_view stream         = cudf::default_stream_value,
+  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
@@ -263,7 +263,7 @@ std::unique_ptr<cudf::column> ngrams_tokenize(cudf::strings_column_view const& s
 {
   CUDF_FUNC_RANGE();
   return detail::ngrams_tokenize(
-    strings, ngrams, delimiter, separator, cudf::default_stream_value, mr);
+    strings, ngrams, delimiter, separator, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu
index 48921ac6520..2d5dd0ebbf8 100644
--- a/cpp/src/text/normalize.cu
+++ b/cpp/src/text/normalize.cu
@@ -244,7 +244,7 @@ std::unique_ptr<cudf::column> normalize_spaces(cudf::strings_column_view const&
                                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::normalize_spaces(strings, cudf::default_stream_value, mr);
+  return detail::normalize_spaces(strings, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -255,7 +255,7 @@ std::unique_ptr<cudf::column> normalize_characters(cudf::strings_column_view con
                                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::normalize_characters(strings, do_lower_case, cudf::default_stream_value, mr);
+  return detail::normalize_characters(strings, do_lower_case, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/replace.cu b/cpp/src/text/replace.cu
index 9171df97800..87c1d345ff5 100644
--- a/cpp/src/text/replace.cu
+++ b/cpp/src/text/replace.cu
@@ -282,7 +282,7 @@ std::unique_ptr<cudf::column> replace_tokens(cudf::strings_column_view const& st
 {
   CUDF_FUNC_RANGE();
   return detail::replace_tokens(
-    strings, targets, replacements, delimiter, cudf::default_stream_value, mr);
+    strings, targets, replacements, delimiter, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> filter_tokens(cudf::strings_column_view const& strings,
@@ -293,7 +293,7 @@ std::unique_ptr<cudf::column> filter_tokens(cudf::strings_column_view const& str
 {
   CUDF_FUNC_RANGE();
   return detail::filter_tokens(
-    strings, min_token_length, replacement, delimiter, cudf::default_stream_value, mr);
+    strings, min_token_length, replacement, delimiter, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/stemmer.cu b/cpp/src/text/stemmer.cu
index cdf87967a0d..780ca5b4e5d 100644
--- a/cpp/src/text/stemmer.cu
+++ b/cpp/src/text/stemmer.cu
@@ -254,7 +254,7 @@ std::unique_ptr<cudf::column> is_letter(cudf::strings_column_view const& strings
   return detail::is_letter(strings,
                            ltype,
                            thrust::make_constant_iterator<cudf::size_type>(character_index),
-                           cudf::default_stream_value,
+                           cudf::get_default_stream(),
                            mr);
 }
 
@@ -264,7 +264,7 @@ std::unique_ptr<cudf::column> is_letter(cudf::strings_column_view const& strings
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_letter(strings, ltype, indices, cudf::default_stream_value, mr);
+  return detail::is_letter(strings, ltype, indices, cudf::get_default_stream(), mr);
 }
 
 /**
@@ -274,7 +274,7 @@ std::unique_ptr<cudf::column> porter_stemmer_measure(cudf::strings_column_view c
                                                      rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::porter_stemmer_measure(strings, cudf::default_stream_value, mr);
+  return detail::porter_stemmer_measure(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/subword/bpe_tokenizer.cu b/cpp/src/text/subword/bpe_tokenizer.cu
index 549704bcbe4..ba07d70fea3 100644
--- a/cpp/src/text/subword/bpe_tokenizer.cu
+++ b/cpp/src/text/subword/bpe_tokenizer.cu
@@ -565,7 +565,7 @@ std::unique_ptr<cudf::column> byte_pair_encoding(cudf::strings_column_view const
                                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::byte_pair_encoding(input, merges_table, separator, cudf::default_stream_value, mr);
+  return detail::byte_pair_encoding(input, merges_table, separator, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/subword/load_hash_file.cu b/cpp/src/text/subword/load_hash_file.cu
index 1b84cfd49fa..b52597fff47 100644
--- a/cpp/src/text/subword/load_hash_file.cu
+++ b/cpp/src/text/subword/load_hash_file.cu
@@ -284,7 +284,7 @@ std::unique_ptr<hashed_vocabulary> load_vocabulary_file(
   std::string const& filename_hashed_vocabulary, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::load_vocabulary_file(filename_hashed_vocabulary, cudf::default_stream_value, mr);
+  return detail::load_vocabulary_file(filename_hashed_vocabulary, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu
index da0598ddfac..518a860e39a 100644
--- a/cpp/src/text/subword/load_merges_file.cu
+++ b/cpp/src/text/subword/load_merges_file.cu
@@ -159,7 +159,7 @@ std::unique_ptr<bpe_merge_pairs> load_merge_pairs_file(std::string const& filena
                                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::load_merge_pairs_file(filename_merges, cudf::default_stream_value, mr);
+  return detail::load_merge_pairs_file(filename_merges, cudf::get_default_stream(), mr);
 }
 
 bpe_merge_pairs::bpe_merge_pairs_impl::bpe_merge_pairs_impl(
diff --git a/cpp/src/text/subword/subword_tokenize.cu b/cpp/src/text/subword/subword_tokenize.cu
index 7bd941f5823..844f2a625e0 100644
--- a/cpp/src/text/subword/subword_tokenize.cu
+++ b/cpp/src/text/subword/subword_tokenize.cu
@@ -270,7 +270,7 @@ tokenizer_result subword_tokenize(cudf::strings_column_view const& strings,
                                   do_lower_case,
                                   do_truncate,
                                   max_rows_tensor,
-                                  cudf::default_stream_value,
+                                  cudf::get_default_stream(),
                                   mr);
 }
 
diff --git a/cpp/src/text/tokenize.cu b/cpp/src/text/tokenize.cu
index 9da28af13c2..4ffd1b08998 100644
--- a/cpp/src/text/tokenize.cu
+++ b/cpp/src/text/tokenize.cu
@@ -232,7 +232,7 @@ std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::tokenize(strings, delimiter, cudf::default_stream_value, mr);
+  return detail::tokenize(strings, delimiter, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
@@ -240,7 +240,7 @@ std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::tokenize(strings, delimiters, cudf::default_stream_value, mr);
+  return detail::tokenize(strings, delimiters, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
@@ -248,7 +248,7 @@ std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& stri
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_tokens(strings, delimiter, cudf::default_stream_value, mr);
+  return detail::count_tokens(strings, delimiter, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
@@ -256,14 +256,14 @@ std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& stri
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_tokens(strings, delimiters, cudf::default_stream_value, mr);
+  return detail::count_tokens(strings, delimiters, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<cudf::column> character_tokenize(cudf::strings_column_view const& strings,
                                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::character_tokenize(strings, cudf::default_stream_value, mr);
+  return detail::character_tokenize(strings, cudf::get_default_stream(), mr);
 }
 
 }  // namespace nvtext
diff --git a/cpp/src/transform/bools_to_mask.cu b/cpp/src/transform/bools_to_mask.cu
index 70ead43e15b..e558b51fbb0 100644
--- a/cpp/src/transform/bools_to_mask.cu
+++ b/cpp/src/transform/bools_to_mask.cu
@@ -61,7 +61,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
   column_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::bools_to_mask(input, cudf::default_stream_value, mr);
+  return detail::bools_to_mask(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/compute_column.cu b/cpp/src/transform/compute_column.cu
index 9d9f1d3d8d2..e11ff437c14 100644
--- a/cpp/src/transform/compute_column.cu
+++ b/cpp/src/transform/compute_column.cu
@@ -137,7 +137,7 @@ std::unique_ptr<column> compute_column(table_view const& table,
                                        rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::compute_column(table, expr, cudf::default_stream_value, mr);
+  return detail::compute_column(table, expr, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/encode.cu b/cpp/src/transform/encode.cu
index 75e3fae6e78..c0e0c83c416 100644
--- a/cpp/src/transform/encode.cu
+++ b/cpp/src/transform/encode.cu
@@ -73,7 +73,7 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
   cudf::table_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::encode(input, cudf::default_stream_value, mr);
+  return detail::encode(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/mask_to_bools.cu b/cpp/src/transform/mask_to_bools.cu
index 23bfe1f24f1..1b9a58c4724 100644
--- a/cpp/src/transform/mask_to_bools.cu
+++ b/cpp/src/transform/mask_to_bools.cu
@@ -64,6 +64,6 @@ std::unique_ptr<column> mask_to_bools(bitmask_type const* bitmask,
                                       rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::default_stream_value, mr);
+  return detail::mask_to_bools(bitmask, begin_bit, end_bit, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/transform/nans_to_nulls.cu b/cpp/src/transform/nans_to_nulls.cu
index d840832af88..3c02409f778 100644
--- a/cpp/src/transform/nans_to_nulls.cu
+++ b/cpp/src/transform/nans_to_nulls.cu
@@ -95,7 +95,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
   column_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::nans_to_nulls(input, cudf::default_stream_value, mr);
+  return detail::nans_to_nulls(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/one_hot_encode.cu b/cpp/src/transform/one_hot_encode.cu
index 94cf86f6829..9ccd21f5898 100644
--- a/cpp/src/transform/one_hot_encode.cu
+++ b/cpp/src/transform/one_hot_encode.cu
@@ -127,6 +127,6 @@ std::pair<std::unique_ptr<column>, table_view> one_hot_encode(column_view const&
                                                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::one_hot_encode(input, categories, cudf::default_stream_value, mr);
+  return detail::one_hot_encode(input, categories, cudf::get_default_stream(), mr);
 }
 }  // namespace cudf
diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index 9545b5289f9..634fdd70831 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -539,7 +539,7 @@ std::unique_ptr<column> row_bit_count(table_view const& t,
 std::unique_ptr<column> row_bit_count(table_view const& t, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::row_bit_count(t, cudf::default_stream_value, mr);
+  return detail::row_bit_count(t, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transform/transform.cpp b/cpp/src/transform/transform.cpp
index 9de17f22b50..5e6646aa48f 100644
--- a/cpp/src/transform/transform.cpp
+++ b/cpp/src/transform/transform.cpp
@@ -99,7 +99,7 @@ std::unique_ptr<column> transform(column_view const& input,
                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::transform(input, unary_udf, output_type, is_ptx, cudf::default_stream_value, mr);
+  return detail::transform(input, unary_udf, output_type, is_ptx, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/transpose/transpose.cu b/cpp/src/transpose/transpose.cu
index 5592e298fa3..94ede5d3c65 100644
--- a/cpp/src/transpose/transpose.cu
+++ b/cpp/src/transpose/transpose.cu
@@ -63,7 +63,7 @@ std::pair<std::unique_ptr<column>, table_view> transpose(table_view const& input
                                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::transpose(input, cudf::default_stream_value, mr);
+  return detail::transpose(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/unary/cast_ops.cu b/cpp/src/unary/cast_ops.cu
index 17c47d8fc90..b569ce04c31 100644
--- a/cpp/src/unary/cast_ops.cu
+++ b/cpp/src/unary/cast_ops.cu
@@ -412,7 +412,7 @@ std::unique_ptr<column> cast(column_view const& input,
                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::cast(input, type, cudf::default_stream_value, mr);
+  return detail::cast(input, type, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/unary/math_ops.cu b/cpp/src/unary/math_ops.cu
index 448ac01babb..961f3a9e720 100644
--- a/cpp/src/unary/math_ops.cu
+++ b/cpp/src/unary/math_ops.cu
@@ -641,7 +641,7 @@ std::unique_ptr<cudf::column> unary_operation(cudf::column_view const& input,
                                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::unary_operation(input, op, cudf::default_stream_value, mr);
+  return detail::unary_operation(input, op, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/unary/nan_ops.cu b/cpp/src/unary/nan_ops.cu
index 5cac9c51e4e..2cf83466b03 100644
--- a/cpp/src/unary/nan_ops.cu
+++ b/cpp/src/unary/nan_ops.cu
@@ -94,14 +94,14 @@ std::unique_ptr<column> is_not_nan(cudf::column_view const& input,
 std::unique_ptr<column> is_nan(cudf::column_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_nan(input, cudf::default_stream_value, mr);
+  return detail::is_nan(input, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_not_nan(cudf::column_view const& input,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_not_nan(input, cudf::default_stream_value, mr);
+  return detail::is_not_nan(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/unary/null_ops.cu b/cpp/src/unary/null_ops.cu
index 04bb1fe63e3..e64c68fdae6 100644
--- a/cpp/src/unary/null_ops.cu
+++ b/cpp/src/unary/null_ops.cu
@@ -58,14 +58,14 @@ std::unique_ptr<column> is_valid(cudf::column_view const& input,
 std::unique_ptr<column> is_null(cudf::column_view const& input, rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_null(input, cudf::default_stream_value, mr);
+  return detail::is_null(input, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> is_valid(cudf::column_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::is_valid(input, cudf::default_stream_value, mr);
+  return detail::is_valid(input, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/utilities/default_stream.cpp b/cpp/src/utilities/default_stream.cpp
index d580972bc97..c21436abdb9 100644
--- a/cpp/src/utilities/default_stream.cpp
+++ b/cpp/src/utilities/default_stream.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -18,6 +18,16 @@
 
 namespace cudf {
 
+namespace detail {
+
+#if defined(CUDF_USE_PER_THREAD_DEFAULT_STREAM)
+rmm::cuda_stream_view const default_stream_value{rmm::cuda_stream_per_thread};
+#else
+rmm::cuda_stream_view const default_stream_value{};
+#endif
+
+}  // namespace detail
+
 /**
  * @brief Check if per-thread default stream is enabled.
  *
@@ -32,4 +42,5 @@ bool is_ptds_enabled()
 #endif
 }
 
+rmm::cuda_stream_view const get_default_stream() { return detail::default_stream_value; }
 }  // namespace cudf
diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp
index 048c6f9dfa2..e4fdf2ddabb 100644
--- a/cpp/tests/bitmask/bitmask_tests.cpp
+++ b/cpp/tests/bitmask/bitmask_tests.cpp
@@ -69,15 +69,15 @@ struct CountBitmaskTest : public cudf::test::BaseFixture {
 
 TEST_F(CountBitmaskTest, NullMask)
 {
-  EXPECT_THROW(cudf::detail::count_set_bits(nullptr, 0, 32, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::count_set_bits(nullptr, 0, 32, cudf::get_default_stream()),
                cudf::logic_error);
-  EXPECT_EQ(32, cudf::detail::valid_count(nullptr, 0, 32, cudf::default_stream_value));
+  EXPECT_EQ(32, cudf::detail::valid_count(nullptr, 0, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 32, 7, 25};
-  EXPECT_THROW(cudf::detail::segmented_count_set_bits(nullptr, indices, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::segmented_count_set_bits(nullptr, indices, cudf::get_default_stream()),
                cudf::logic_error);
   auto valid_counts =
-    cudf::detail::segmented_valid_count(nullptr, indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(nullptr, indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{32, 18}));
 }
 
@@ -88,11 +88,11 @@ rmm::device_uvector<cudf::bitmask_type> make_mask(cudf::size_type size, bool fil
   if (!fill_valid) {
     return cudf::detail::make_zeroed_device_uvector_sync<cudf::bitmask_type>(size);
   } else {
-    auto ret = rmm::device_uvector<cudf::bitmask_type>(size, cudf::default_stream_value);
+    auto ret = rmm::device_uvector<cudf::bitmask_type>(size, cudf::get_default_stream());
     CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(),
                                   ~cudf::bitmask_type{0},
                                   size * sizeof(cudf::bitmask_type),
-                                  cudf::default_stream_value.value()));
+                                  cudf::get_default_stream().value()));
     return ret;
   }
 }
@@ -100,244 +100,244 @@ rmm::device_uvector<cudf::bitmask_type> make_mask(cudf::size_type size, bool fil
 TEST_F(CountBitmaskTest, NegativeStart)
 {
   auto mask = make_mask(1);
-  EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), -1, 32, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), -1, 32, cudf::get_default_stream()),
                cudf::logic_error);
-  EXPECT_THROW(cudf::detail::valid_count(mask.data(), -1, 32, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::valid_count(mask.data(), -1, 32, cudf::get_default_stream()),
                cudf::logic_error);
 
   std::vector<cudf::size_type> indices = {0, 16, -1, 32};
   EXPECT_THROW(
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value),
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()),
     cudf::logic_error);
   EXPECT_THROW(
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value),
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()),
     cudf::logic_error);
 }
 
 TEST_F(CountBitmaskTest, StartLargerThanStop)
 {
   auto mask = make_mask(1);
-  EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), 32, 31, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::count_set_bits(mask.data(), 32, 31, cudf::get_default_stream()),
                cudf::logic_error);
-  EXPECT_THROW(cudf::detail::valid_count(mask.data(), 32, 31, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::valid_count(mask.data(), 32, 31, cudf::get_default_stream()),
                cudf::logic_error);
 
   std::vector<cudf::size_type> indices = {0, 16, 31, 30};
   EXPECT_THROW(
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value),
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream()),
     cudf::logic_error);
   EXPECT_THROW(
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value),
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream()),
     cudf::logic_error);
 }
 
 TEST_F(CountBitmaskTest, EmptyRange)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 17, cudf::default_stream_value));
-  EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 17, cudf::default_stream_value));
+  EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 17, cudf::get_default_stream()));
+  EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 17, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 0, 17, 17};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordAllZero)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::default_stream_value));
-  EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 0, 32, cudf::default_stream_value));
+  EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::get_default_stream()));
+  EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 0, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 32, 0, 32};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
   auto valid_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountBitmaskTest, SingleBitAllZero)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 18, cudf::default_stream_value));
-  EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 18, cudf::default_stream_value));
+  EXPECT_EQ(0, cudf::detail::count_set_bits(mask.data(), 17, 18, cudf::get_default_stream()));
+  EXPECT_EQ(0, cudf::detail::valid_count(mask.data(), 17, 18, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {17, 18, 7, 8};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountBitmaskTest, SingleBitAllSet)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 13, 14, cudf::default_stream_value));
-  EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 13, 14, cudf::default_stream_value));
+  EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 13, 14, cudf::get_default_stream()));
+  EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 13, 14, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {13, 14, 0, 1};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{1, 1}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{1, 1}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordAllBitsSet)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(32, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::default_stream_value));
-  EXPECT_EQ(32, cudf::detail::valid_count(mask.data(), 0, 32, cudf::default_stream_value));
+  EXPECT_EQ(32, cudf::detail::count_set_bits(mask.data(), 0, 32, cudf::get_default_stream()));
+  EXPECT_EQ(32, cudf::detail::valid_count(mask.data(), 0, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 32, 0, 32};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{32, 32}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{32, 32}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordPreSlack)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(25, cudf::detail::count_set_bits(mask.data(), 7, 32, cudf::default_stream_value));
-  EXPECT_EQ(25, cudf::detail::valid_count(mask.data(), 7, 32, cudf::default_stream_value));
+  EXPECT_EQ(25, cudf::detail::count_set_bits(mask.data(), 7, 32, cudf::get_default_stream()));
+  EXPECT_EQ(25, cudf::detail::valid_count(mask.data(), 7, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {7, 32, 8, 32};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{25, 24}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{25, 24}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordPostSlack)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(17, cudf::detail::count_set_bits(mask.data(), 0, 17, cudf::default_stream_value));
-  EXPECT_EQ(17, cudf::detail::valid_count(mask.data(), 0, 17, cudf::default_stream_value));
+  EXPECT_EQ(17, cudf::detail::count_set_bits(mask.data(), 0, 17, cudf::get_default_stream()));
+  EXPECT_EQ(17, cudf::detail::valid_count(mask.data(), 0, 17, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 17, 0, 18};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{17, 18}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{17, 18}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordSubset)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(30, cudf::detail::count_set_bits(mask.data(), 1, 31, cudf::default_stream_value));
-  EXPECT_EQ(30, cudf::detail::valid_count(mask.data(), 1, 31, cudf::default_stream_value));
+  EXPECT_EQ(30, cudf::detail::count_set_bits(mask.data(), 1, 31, cudf::get_default_stream()));
+  EXPECT_EQ(30, cudf::detail::valid_count(mask.data(), 1, 31, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {1, 31, 7, 17};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{30, 10}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{30, 10}));
 }
 
 TEST_F(CountBitmaskTest, SingleWordSubset2)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(28, cudf::detail::count_set_bits(mask.data(), 2, 30, cudf::default_stream_value));
-  EXPECT_EQ(28, cudf::detail::valid_count(mask.data(), 2, 30, cudf::default_stream_value));
+  EXPECT_EQ(28, cudf::detail::count_set_bits(mask.data(), 2, 30, cudf::get_default_stream()));
+  EXPECT_EQ(28, cudf::detail::valid_count(mask.data(), 2, 30, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {4, 16, 2, 30};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{12, 28}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{12, 28}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsAllBits)
 {
   auto mask = make_mask(10, true);
-  EXPECT_EQ(320, cudf::detail::count_set_bits(mask.data(), 0, 320, cudf::default_stream_value));
-  EXPECT_EQ(320, cudf::detail::valid_count(mask.data(), 0, 320, cudf::default_stream_value));
+  EXPECT_EQ(320, cudf::detail::count_set_bits(mask.data(), 0, 320, cudf::get_default_stream()));
+  EXPECT_EQ(320, cudf::detail::valid_count(mask.data(), 0, 320, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 320, 0, 320};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{320, 320}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{320, 320}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSubsetWordBoundary)
 {
   auto mask = make_mask(10, true);
-  EXPECT_EQ(256, cudf::detail::count_set_bits(mask.data(), 32, 288, cudf::default_stream_value));
-  EXPECT_EQ(256, cudf::detail::valid_count(mask.data(), 32, 288, cudf::default_stream_value));
+  EXPECT_EQ(256, cudf::detail::count_set_bits(mask.data(), 32, 288, cudf::get_default_stream()));
+  EXPECT_EQ(256, cudf::detail::valid_count(mask.data(), 32, 288, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {32, 192, 32, 288};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{160, 256}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{160, 256}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSplitWordBoundary)
 {
   auto mask = make_mask(10, true);
-  EXPECT_EQ(2, cudf::detail::count_set_bits(mask.data(), 31, 33, cudf::default_stream_value));
-  EXPECT_EQ(2, cudf::detail::valid_count(mask.data(), 31, 33, cudf::default_stream_value));
+  EXPECT_EQ(2, cudf::detail::count_set_bits(mask.data(), 31, 33, cudf::get_default_stream()));
+  EXPECT_EQ(2, cudf::detail::valid_count(mask.data(), 31, 33, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {31, 33, 60, 67};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{2, 7}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{2, 7}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSubset)
 {
   auto mask = make_mask(10, true);
-  EXPECT_EQ(226, cudf::detail::count_set_bits(mask.data(), 67, 293, cudf::default_stream_value));
-  EXPECT_EQ(226, cudf::detail::valid_count(mask.data(), 67, 293, cudf::default_stream_value));
+  EXPECT_EQ(226, cudf::detail::count_set_bits(mask.data(), 67, 293, cudf::get_default_stream()));
+  EXPECT_EQ(226, cudf::detail::valid_count(mask.data(), 67, 293, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {67, 293, 37, 319};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{226, 282}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{226, 282}));
 }
 
 TEST_F(CountBitmaskTest, MultipleWordsSingleBit)
 {
   auto mask = make_mask(10, true);
-  EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 67, 68, cudf::default_stream_value));
-  EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 67, 68, cudf::default_stream_value));
+  EXPECT_EQ(1, cudf::detail::count_set_bits(mask.data(), 67, 68, cudf::get_default_stream()));
+  EXPECT_EQ(1, cudf::detail::valid_count(mask.data(), 67, 68, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {67, 68, 31, 32, 192, 193};
   auto set_counts =
-    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_set_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(set_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{1, 1, 1}));
   auto valid_counts =
-    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_valid_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(valid_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{1, 1, 1}));
 }
 
@@ -346,180 +346,180 @@ using CountUnsetBitsTest = CountBitmaskTest;
 TEST_F(CountUnsetBitsTest, SingleBitAllSet)
 {
   auto mask = make_mask(1, true);
-  EXPECT_EQ(0, cudf::detail::count_unset_bits(mask.data(), 13, 14, cudf::default_stream_value));
-  EXPECT_EQ(0, cudf::detail::null_count(mask.data(), 13, 14, cudf::default_stream_value));
+  EXPECT_EQ(0, cudf::detail::count_unset_bits(mask.data(), 13, 14, cudf::get_default_stream()));
+  EXPECT_EQ(0, cudf::detail::null_count(mask.data(), 13, 14, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {13, 14, 31, 32};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
   auto null_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountUnsetBitsTest, NullMask)
 {
-  EXPECT_THROW(cudf::detail::count_unset_bits(nullptr, 0, 32, cudf::default_stream_value),
+  EXPECT_THROW(cudf::detail::count_unset_bits(nullptr, 0, 32, cudf::get_default_stream()),
                cudf::logic_error);
-  EXPECT_EQ(0, cudf::detail::null_count(nullptr, 0, 32, cudf::default_stream_value));
+  EXPECT_EQ(0, cudf::detail::null_count(nullptr, 0, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 32, 7, 25};
   EXPECT_THROW(
-    cudf::detail::segmented_count_unset_bits(nullptr, indices, cudf::default_stream_value),
+    cudf::detail::segmented_count_unset_bits(nullptr, indices, cudf::get_default_stream()),
     cudf::logic_error);
   auto null_counts =
-    cudf::detail::segmented_null_count(nullptr, indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(nullptr, indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{0, 0}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordAllBits)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(32, cudf::detail::count_unset_bits(mask.data(), 0, 32, cudf::default_stream_value));
-  EXPECT_EQ(32, cudf::detail::null_count(mask.data(), 0, 32, cudf::default_stream_value));
+  EXPECT_EQ(32, cudf::detail::count_unset_bits(mask.data(), 0, 32, cudf::get_default_stream()));
+  EXPECT_EQ(32, cudf::detail::null_count(mask.data(), 0, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 32, 0, 32};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{32, 32}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{32, 32}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordPreSlack)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(25, cudf::detail::count_unset_bits(mask.data(), 7, 32, cudf::default_stream_value));
-  EXPECT_EQ(25, cudf::detail::null_count(mask.data(), 7, 32, cudf::default_stream_value));
+  EXPECT_EQ(25, cudf::detail::count_unset_bits(mask.data(), 7, 32, cudf::get_default_stream()));
+  EXPECT_EQ(25, cudf::detail::null_count(mask.data(), 7, 32, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {7, 32, 8, 32};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{25, 24}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{25, 24}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordPostSlack)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(17, cudf::detail::count_unset_bits(mask.data(), 0, 17, cudf::default_stream_value));
-  EXPECT_EQ(17, cudf::detail::null_count(mask.data(), 0, 17, cudf::default_stream_value));
+  EXPECT_EQ(17, cudf::detail::count_unset_bits(mask.data(), 0, 17, cudf::get_default_stream()));
+  EXPECT_EQ(17, cudf::detail::null_count(mask.data(), 0, 17, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 17, 0, 18};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{17, 18}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{17, 18}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordSubset)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(30, cudf::detail::count_unset_bits(mask.data(), 1, 31, cudf::default_stream_value));
-  EXPECT_EQ(30, cudf::detail::null_count(mask.data(), 1, 31, cudf::default_stream_value));
+  EXPECT_EQ(30, cudf::detail::count_unset_bits(mask.data(), 1, 31, cudf::get_default_stream()));
+  EXPECT_EQ(30, cudf::detail::null_count(mask.data(), 1, 31, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {1, 31, 7, 17};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{30, 10}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{30, 10}));
 }
 
 TEST_F(CountUnsetBitsTest, SingleWordSubset2)
 {
   auto mask = make_mask(1);
-  EXPECT_EQ(28, cudf::detail::count_unset_bits(mask.data(), 2, 30, cudf::default_stream_value));
-  EXPECT_EQ(28, cudf::detail::null_count(mask.data(), 2, 30, cudf::default_stream_value));
+  EXPECT_EQ(28, cudf::detail::count_unset_bits(mask.data(), 2, 30, cudf::get_default_stream()));
+  EXPECT_EQ(28, cudf::detail::null_count(mask.data(), 2, 30, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {4, 16, 2, 30};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{12, 28}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{12, 28}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsAllBits)
 {
   auto mask = make_mask(10);
-  EXPECT_EQ(320, cudf::detail::count_unset_bits(mask.data(), 0, 320, cudf::default_stream_value));
-  EXPECT_EQ(320, cudf::detail::null_count(mask.data(), 0, 320, cudf::default_stream_value));
+  EXPECT_EQ(320, cudf::detail::count_unset_bits(mask.data(), 0, 320, cudf::get_default_stream()));
+  EXPECT_EQ(320, cudf::detail::null_count(mask.data(), 0, 320, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {0, 320, 0, 320};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{320, 320}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{320, 320}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSubsetWordBoundary)
 {
   auto mask = make_mask(10);
-  EXPECT_EQ(256, cudf::detail::count_unset_bits(mask.data(), 32, 288, cudf::default_stream_value));
-  EXPECT_EQ(256, cudf::detail::null_count(mask.data(), 32, 288, cudf::default_stream_value));
+  EXPECT_EQ(256, cudf::detail::count_unset_bits(mask.data(), 32, 288, cudf::get_default_stream()));
+  EXPECT_EQ(256, cudf::detail::null_count(mask.data(), 32, 288, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {32, 192, 32, 288};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{160, 256}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{160, 256}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSplitWordBoundary)
 {
   auto mask = make_mask(10);
-  EXPECT_EQ(2, cudf::detail::count_unset_bits(mask.data(), 31, 33, cudf::default_stream_value));
-  EXPECT_EQ(2, cudf::detail::null_count(mask.data(), 31, 33, cudf::default_stream_value));
+  EXPECT_EQ(2, cudf::detail::count_unset_bits(mask.data(), 31, 33, cudf::get_default_stream()));
+  EXPECT_EQ(2, cudf::detail::null_count(mask.data(), 31, 33, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {31, 33, 60, 67};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{2, 7}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{2, 7}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSubset)
 {
   auto mask = make_mask(10);
-  EXPECT_EQ(226, cudf::detail::count_unset_bits(mask.data(), 67, 293, cudf::default_stream_value));
-  EXPECT_EQ(226, cudf::detail::null_count(mask.data(), 67, 293, cudf::default_stream_value));
+  EXPECT_EQ(226, cudf::detail::count_unset_bits(mask.data(), 67, 293, cudf::get_default_stream()));
+  EXPECT_EQ(226, cudf::detail::null_count(mask.data(), 67, 293, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {67, 293, 37, 319};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{226, 282}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{226, 282}));
 }
 
 TEST_F(CountUnsetBitsTest, MultipleWordsSingleBit)
 {
   auto mask = make_mask(10);
-  EXPECT_EQ(1, cudf::detail::count_unset_bits(mask.data(), 67, 68, cudf::default_stream_value));
-  EXPECT_EQ(1, cudf::detail::null_count(mask.data(), 67, 68, cudf::default_stream_value));
+  EXPECT_EQ(1, cudf::detail::count_unset_bits(mask.data(), 67, 68, cudf::get_default_stream()));
+  EXPECT_EQ(1, cudf::detail::null_count(mask.data(), 67, 68, cudf::get_default_stream()));
 
   std::vector<cudf::size_type> indices = {67, 68, 31, 32, 192, 193};
   auto unset_counts =
-    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_count_unset_bits(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(unset_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{1, 1, 1}));
   auto null_counts =
-    cudf::detail::segmented_null_count(mask.data(), indices, cudf::default_stream_value);
+    cudf::detail::segmented_null_count(mask.data(), indices, cudf::get_default_stream());
   EXPECT_THAT(null_counts, ::testing::ElementsAreArray(std::vector<cudf::size_type>{1, 1, 1}));
 }
 
@@ -622,10 +622,10 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorContiguous)
   }
   auto gold_mask = cudf::test::detail::make_null_mask(validity_bit.begin(), validity_bit.end());
 
-  rmm::device_buffer copy_mask{gold_mask, cudf::default_stream_value};
+  rmm::device_buffer copy_mask{gold_mask, cudf::get_default_stream()};
   cudf::column original{t,
                         num_elements,
-                        rmm::device_buffer{num_elements * sizeof(int), cudf::default_stream_value},
+                        rmm::device_buffer{num_elements * sizeof(int), cudf::get_default_stream()},
                         std::move(copy_mask)};
   std::vector<cudf::size_type> indices{0,
                                        104,
@@ -669,7 +669,7 @@ TEST_F(CopyBitmaskTest, TestCopyColumnViewVectorDiscontiguous)
     cols.emplace_back(
       t,
       split[i + 1] - split[i],
-      rmm::device_buffer{sizeof(int) * (split[i + 1] - split[i]), cudf::default_stream_value},
+      rmm::device_buffer{sizeof(int) * (split[i + 1] - split[i]), cudf::get_default_stream()},
       cudf::test::detail::make_null_mask(validity_bit.begin() + split[i],
                                          validity_bit.begin() + split[i + 1]));
     views.push_back(cols.back());
diff --git a/cpp/tests/bitmask/set_nullmask_tests.cu b/cpp/tests/bitmask/set_nullmask_tests.cu
index 1b7ffe2ff72..19e3202a6d7 100644
--- a/cpp/tests/bitmask/set_nullmask_tests.cu
+++ b/cpp/tests/bitmask/set_nullmask_tests.cu
@@ -50,7 +50,7 @@ struct SetBitmaskTest : public cudf::test::BaseFixture {
   void expect_bitmask_equal(cudf::bitmask_type const* bitmask,  // Device Ptr
                             cudf::size_type start_bit,
                             thrust::host_vector<bool> const& expect,
-                            rmm::cuda_stream_view stream = cudf::default_stream_value)
+                            rmm::cuda_stream_view stream = cudf::get_default_stream())
   {
     rmm::device_uvector<bool> result(expect.size(), stream);
     auto counting_iter = thrust::counting_iterator<cudf::size_type>{0};
diff --git a/cpp/tests/column/column_device_view_test.cu b/cpp/tests/column/column_device_view_test.cu
index c317ddec8b7..0868ad885cf 100644
--- a/cpp/tests/column/column_device_view_test.cu
+++ b/cpp/tests/column/column_device_view_test.cu
@@ -37,7 +37,7 @@ struct ColumnDeviceViewTest : public cudf::test::BaseFixture {
 TEST_F(ColumnDeviceViewTest, Sample)
 {
   using T = int32_t;
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
   cudf::test::fixed_width_column_wrapper<T> input({1, 2, 3, 4, 5, 6});
   auto output            = cudf::allocate_like(input);
   auto input_device_view = cudf::column_device_view::create(input, stream);
@@ -55,7 +55,7 @@ TEST_F(ColumnDeviceViewTest, Sample)
 TEST_F(ColumnDeviceViewTest, MismatchingType)
 {
   using T = int32_t;
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
   cudf::test::fixed_width_column_wrapper<T> input({1, 2, 3, 4, 5, 6});
   auto output            = cudf::allocate_like(input);
   auto input_device_view = cudf::column_device_view::create(input, stream);
diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu
index 51f37ecac6c..b31c38e4187 100644
--- a/cpp/tests/column/column_test.cu
+++ b/cpp/tests/column/column_test.cu
@@ -43,15 +43,15 @@ struct TypedColumnTest : public cudf::test::BaseFixture {
   cudf::data_type type() { return cudf::data_type{cudf::type_to_id<T>()}; }
 
   TypedColumnTest()
-    : data{_num_elements * cudf::size_of(type()), cudf::default_stream_value},
-      mask{cudf::bitmask_allocation_size_bytes(_num_elements), cudf::default_stream_value}
+    : data{_num_elements * cudf::size_of(type()), cudf::get_default_stream()},
+      mask{cudf::bitmask_allocation_size_bytes(_num_elements), cudf::get_default_stream()}
   {
     auto typed_data = static_cast<char*>(data.data());
     auto typed_mask = static_cast<char*>(mask.data());
     thrust::sequence(
-      rmm::exec_policy(cudf::default_stream_value), typed_data, typed_data + data.size());
+      rmm::exec_policy(cudf::get_default_stream()), typed_data, typed_data + data.size());
     thrust::sequence(
-      rmm::exec_policy(cudf::default_stream_value), typed_mask, typed_mask + mask.size());
+      rmm::exec_policy(cudf::get_default_stream()), typed_mask, typed_mask + mask.size());
   }
 
   cudf::size_type num_elements() { return _num_elements; }
@@ -247,8 +247,8 @@ TYPED_TEST(TypedColumnTest, CopyDataAndMask)
 {
   cudf::column col{this->type(),
                    this->num_elements(),
-                   rmm::device_buffer{this->data, cudf::default_stream_value},
-                   rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}};
+                   rmm::device_buffer{this->data, cudf::get_default_stream()},
+                   rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}};
   EXPECT_EQ(this->type(), col.type());
   EXPECT_TRUE(col.nullable());
   EXPECT_EQ(0, col.null_count());
@@ -352,8 +352,8 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask)
 TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask)
 {
   rmm::device_uvector<TypeParam> original{static_cast<std::size_t>(this->num_elements()),
-                                          cudf::default_stream_value};
-  thrust::copy(rmm::exec_policy(cudf::default_stream_value),
+                                          cudf::get_default_stream()};
+  thrust::copy(rmm::exec_policy(cudf::get_default_stream()),
                static_cast<TypeParam*>(this->data.data()),
                static_cast<TypeParam*>(this->data.data()) + this->num_elements(),
                original.begin());
@@ -369,8 +369,8 @@ TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask)
 TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorWithMask)
 {
   rmm::device_uvector<TypeParam> original{static_cast<std::size_t>(this->num_elements()),
-                                          cudf::default_stream_value};
-  thrust::copy(rmm::exec_policy(cudf::default_stream_value),
+                                          cudf::get_default_stream()};
+  thrust::copy(rmm::exec_policy(cudf::get_default_stream()),
                static_cast<TypeParam*>(this->data.data()),
                static_cast<TypeParam*>(this->data.data()) + this->num_elements(),
                original.begin());
@@ -392,17 +392,17 @@ TYPED_TEST(TypedColumnTest, ConstructWithChildren)
   children.emplace_back(std::make_unique<cudf::column>(
     cudf::data_type{cudf::type_id::INT8},
     42,
-    rmm::device_buffer{this->data, cudf::default_stream_value},
-    rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}));
+    rmm::device_buffer{this->data, cudf::get_default_stream()},
+    rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}));
   children.emplace_back(std::make_unique<cudf::column>(
     cudf::data_type{cudf::type_id::FLOAT64},
     314,
-    rmm::device_buffer{this->data, cudf::default_stream_value},
-    rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}));
+    rmm::device_buffer{this->data, cudf::get_default_stream()},
+    rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}));
   cudf::column col{this->type(),
                    this->num_elements(),
-                   rmm::device_buffer{this->data, cudf::default_stream_value},
-                   rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value},
+                   rmm::device_buffer{this->data, cudf::get_default_stream()},
+                   rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()},
                    cudf::UNKNOWN_NULL_COUNT,
                    std::move(children)};
 
@@ -437,17 +437,17 @@ TYPED_TEST(TypedColumnTest, ReleaseWithChildren)
   children.emplace_back(std::make_unique<cudf::column>(
     this->type(),
     this->num_elements(),
-    rmm::device_buffer{this->data, cudf::default_stream_value},
-    rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}));
+    rmm::device_buffer{this->data, cudf::get_default_stream()},
+    rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}));
   children.emplace_back(std::make_unique<cudf::column>(
     this->type(),
     this->num_elements(),
-    rmm::device_buffer{this->data, cudf::default_stream_value},
-    rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value}));
+    rmm::device_buffer{this->data, cudf::get_default_stream()},
+    rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()}));
   cudf::column col{this->type(),
                    this->num_elements(),
-                   rmm::device_buffer{this->data, cudf::default_stream_value},
-                   rmm::device_buffer{this->all_valid_mask, cudf::default_stream_value},
+                   rmm::device_buffer{this->data, cudf::get_default_stream()},
+                   rmm::device_buffer{this->all_valid_mask, cudf::get_default_stream()},
                    cudf::UNKNOWN_NULL_COUNT,
                    std::move(children)};
 
diff --git a/cpp/tests/column/compound_test.cu b/cpp/tests/column/compound_test.cu
index 58be2b2f316..79f746369f2 100644
--- a/cpp/tests/column/compound_test.cu
+++ b/cpp/tests/column/compound_test.cu
@@ -65,13 +65,13 @@ struct checker_for_level2 {
 
 TEST_F(CompoundColumnTest, ChildrenLevel1)
 {
-  rmm::device_uvector<int32_t> data(1000, cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), data.begin(), data.end(), 1);
+  rmm::device_uvector<int32_t> data(1000, cudf::get_default_stream());
+  thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), data.begin(), data.end(), 1);
 
   auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED);
-  rmm::device_buffer data1{data.data() + 100, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data2{data.data() + 200, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data3{data.data() + 300, 100 * sizeof(int32_t), cudf::default_stream_value};
+  rmm::device_buffer data1{data.data() + 100, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data2{data.data() + 200, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data3{data.data() + 300, 100 * sizeof(int32_t), cudf::get_default_stream()};
   auto child1 =
     std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::INT32},
                                    100,
@@ -105,14 +105,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel1)
 
   {
     auto column = cudf::column_device_view::create(parent->view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level1<cudf::column_device_view>{*column}));
   }
   {
     auto column = cudf::mutable_column_device_view::create(parent->mutable_view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level1<cudf::mutable_column_device_view>{*column}));
@@ -121,16 +121,16 @@ TEST_F(CompoundColumnTest, ChildrenLevel1)
 
 TEST_F(CompoundColumnTest, ChildrenLevel2)
 {
-  rmm::device_uvector<int32_t> data(1000, cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), data.begin(), data.end(), 1);
+  rmm::device_uvector<int32_t> data(1000, cudf::get_default_stream());
+  thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), data.begin(), data.end(), 1);
 
   auto null_mask = cudf::create_null_mask(100, cudf::mask_state::UNALLOCATED);
-  rmm::device_buffer data11{data.data() + 100, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data12{data.data() + 200, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data13{data.data() + 300, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data21{data.data() + 400, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data22{data.data() + 500, 100 * sizeof(int32_t), cudf::default_stream_value};
-  rmm::device_buffer data23{data.data() + 600, 100 * sizeof(int32_t), cudf::default_stream_value};
+  rmm::device_buffer data11{data.data() + 100, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data12{data.data() + 200, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data13{data.data() + 300, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data21{data.data() + 400, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data22{data.data() + 500, 100 * sizeof(int32_t), cudf::get_default_stream()};
+  rmm::device_buffer data23{data.data() + 600, 100 * sizeof(int32_t), cudf::get_default_stream()};
   auto gchild11 =
     std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::INT32},
                                    100,
@@ -202,14 +202,14 @@ TEST_F(CompoundColumnTest, ChildrenLevel2)
 
   {
     auto column = cudf::column_device_view::create(parent->view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level2<cudf::column_device_view>{*column}));
   }
   {
     auto column = cudf::mutable_column_device_view::create(parent->mutable_view());
-    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::default_stream_value),
+    EXPECT_TRUE(thrust::any_of(rmm::exec_policy(cudf::get_default_stream()),
                                thrust::make_counting_iterator<int32_t>(0),
                                thrust::make_counting_iterator<int32_t>(100),
                                checker_for_level2<cudf::mutable_column_device_view>{*column}));
diff --git a/cpp/tests/column/factories_test.cpp b/cpp/tests/column/factories_test.cpp
index bd37da91f69..9386bb9a840 100644
--- a/cpp/tests/column/factories_test.cpp
+++ b/cpp/tests/column/factories_test.cpp
@@ -36,7 +36,7 @@ class ColumnFactoryTest : public cudf::test::BaseFixture {
 
  public:
   cudf::size_type size() { return _size; }
-  rmm::cuda_stream_view stream() { return cudf::default_stream_value; }
+  rmm::cuda_stream_view stream() { return cudf::get_default_stream(); }
 };
 
 template <typename T>
diff --git a/cpp/tests/copying/concatenate_tests.cu b/cpp/tests/copying/concatenate_tests.cu
index c06afe85cff..f01a17a0005 100644
--- a/cpp/tests/copying/concatenate_tests.cu
+++ b/cpp/tests/copying/concatenate_tests.cu
@@ -53,7 +53,7 @@ template <typename T>
 struct TypedColumnTest : public cudf::test::BaseFixture {
   cudf::data_type type() { return cudf::data_type{cudf::type_to_id<T>()}; }
 
-  TypedColumnTest(rmm::cuda_stream_view stream = cudf::default_stream_value)
+  TypedColumnTest(rmm::cuda_stream_view stream = cudf::get_default_stream())
     : data{_num_elements * cudf::size_of(type()), stream},
       mask{cudf::bitmask_allocation_size_bytes(_num_elements), stream}
   {
@@ -356,7 +356,7 @@ TEST_F(OverflowTest, OverflowTest)
     table_view tbl_last({*many_chars_last});
     std::vector<cudf::table_view> table_views_to_concat({tbl, tbl, tbl, tbl, tbl, tbl_last});
     std::unique_ptr<cudf::table> concatenated_tables = cudf::concatenate(table_views_to_concat);
-    EXPECT_NO_THROW(cudf::default_stream_value.synchronize());
+    EXPECT_NO_THROW(cudf::get_default_stream().synchronize());
     ASSERT_EQ(concatenated_tables->num_rows(), std::numeric_limits<size_type>::max());
   }
 
@@ -522,11 +522,11 @@ TEST_F(OverflowTest, Presliced)
 
     // try and concatenate 4 string columns of with ~1/2 billion chars in each
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  string_size);
-    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -596,11 +596,11 @@ TEST_F(OverflowTest, Presliced)
 
     // try and concatenate 4 struct columns of with ~1/2 billion elements in each
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  list_size);
-    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -688,11 +688,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
     constexpr size_type string_size = inner_size / num_rows;
 
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  string_size);
-    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -715,11 +715,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
     constexpr size_type list_size = inner_size / num_rows;
 
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  list_size);
-    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
@@ -742,11 +742,11 @@ TEST_F(OverflowTest, BigColumnsSmallSlices)
     constexpr size_type list_size = inner_size / num_rows;
 
     auto offsets = cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows + 1);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  offsets->mutable_view().begin<offset_type>(),
                  offsets->mutable_view().end<offset_type>(),
                  list_size);
-    thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+    thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                            offsets->view().begin<offset_type>(),
                            offsets->view().end<offset_type>(),
                            offsets->mutable_view().begin<offset_type>());
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index 9cd74abce1c..08037b78a70 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -46,9 +46,9 @@ TYPED_TEST_SUITE(GatherTest, cudf::test::NumericTypes);
 TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
 {
   constexpr cudf::size_type source_size{1000};
-  rmm::device_uvector<cudf::size_type> gather_map(source_size, cudf::default_stream_value);
+  rmm::device_uvector<cudf::size_type> gather_map(source_size, cudf::get_default_stream());
   thrust::sequence(
-    rmm::exec_policy(cudf::default_stream_value), gather_map.begin(), gather_map.end());
+    rmm::exec_policy(cudf::get_default_stream()), gather_map.begin(), gather_map.end());
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
   cudf::test::fixed_width_column_wrapper<TypeParam> source_column(data, data + source_size);
diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp
index 179ab56fc40..d262cbccd61 100644
--- a/cpp/tests/copying/scatter_list_tests.cpp
+++ b/cpp/tests/copying/scatter_list_tests.cpp
@@ -67,7 +67,7 @@ TYPED_TEST(TypedScatterListsTest, SlicedInputLists)
   auto src_list_column =
     lists_column_wrapper<T, int32_t>{{0, 0, 0, 0}, {9, 9, 9, 9}, {8, 8, 8}, {7, 7, 7}}.release();
   auto src_sliced =
-    cudf::detail::slice(src_list_column->view(), {1, 3}, cudf::default_stream_value).front();
+    cudf::detail::slice(src_list_column->view(), {1, 3}, cudf::get_default_stream()).front();
 
   auto target_list_column =
     lists_column_wrapper<T, int32_t>{{0, 0}, {1, 1}, {2, 2}, {3, 3}, {4, 4}, {5, 5}, {6, 6}}
@@ -84,7 +84,7 @@ TYPED_TEST(TypedScatterListsTest, SlicedInputLists)
       {8, 8, 8}, {1, 1}, {9, 9, 9, 9}, {3, 3}, {4, 4}, {5, 5}, {6, 6}});
 
   auto target_sliced =
-    cudf::detail::slice(target_list_column->view(), {1, 6}, cudf::default_stream_value);
+    cudf::detail::slice(target_list_column->view(), {1, 6}, cudf::get_default_stream());
 
   auto ret_2 =
     cudf::scatter(cudf::table_view({src_sliced}), scatter_map, cudf::table_view({target_sliced}));
diff --git a/cpp/tests/copying/shift_tests.cpp b/cpp/tests/copying/shift_tests.cpp
index 3907afd10c0..288e1d3fec6 100644
--- a/cpp/tests/copying/shift_tests.cpp
+++ b/cpp/tests/copying/shift_tests.cpp
@@ -35,7 +35,7 @@ using TestTypes = cudf::test::Types<int32_t>;
 
 template <typename T, typename ScalarType = cudf::scalar_type_t<T>>
 std::unique_ptr<cudf::scalar> make_scalar(
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto s = new ScalarType(cudf::test::make_type_param_scalar<T>(0), false, stream, mr);
@@ -45,7 +45,7 @@ std::unique_ptr<cudf::scalar> make_scalar(
 template <typename T, typename ScalarType = cudf::scalar_type_t<T>>
 std::unique_ptr<cudf::scalar> make_scalar(
   T value,
-  rmm::cuda_stream_view stream        = cudf::default_stream_value,
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto s = new ScalarType(value, true, stream, mr);
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index 17e67da6227..fd43690dcff 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -147,16 +147,16 @@ struct AtomicsTest : public cudf::test::BaseFixture {
     if (block_size == 0) { block_size = vec_size; }
 
     if (is_cas_test) {
-      gpu_atomicCAS_test<<<grid_size, block_size, 0, cudf::default_stream_value.value()>>>(
+      gpu_atomicCAS_test<<<grid_size, block_size, 0, cudf::get_default_stream().value()>>>(
         dev_result.data(), dev_data.data(), vec_size);
     } else {
-      gpu_atomic_test<<<grid_size, block_size, 0, cudf::default_stream_value.value()>>>(
+      gpu_atomic_test<<<grid_size, block_size, 0, cudf::get_default_stream().value()>>>(
         dev_result.data(), dev_data.data(), vec_size);
     }
 
     auto host_result = cudf::detail::make_host_vector_sync(dev_result);
 
-    CUDF_CHECK_CUDA(cudf::default_stream_value.value());
+    CUDF_CHECK_CUDA(cudf::get_default_stream().value());
 
     if (!is_timestamp_sum<T, cudf::DeviceSum>()) {
       EXPECT_EQ(host_result[0], exact[0]) << "atomicAdd test failed";
@@ -298,12 +298,12 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture {
 
     if (block_size == 0) { block_size = vec_size; }
 
-    gpu_atomic_bitwiseOp_test<T><<<grid_size, block_size, 0, cudf::default_stream_value.value()>>>(
+    gpu_atomic_bitwiseOp_test<T><<<grid_size, block_size, 0, cudf::get_default_stream().value()>>>(
       reinterpret_cast<T*>(dev_result.data()), reinterpret_cast<T*>(dev_data.data()), vec_size);
 
     auto host_result = cudf::detail::make_host_vector_sync(dev_result);
 
-    CUDF_CHECK_CUDA(cudf::default_stream_value.value());
+    CUDF_CHECK_CUDA(cudf::get_default_stream().value());
 
     // print_exact(exact, "exact");
     // print_exact(host_result.data(), "result");
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index e34cf23eee4..e83d961cd9b 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -90,7 +90,7 @@ TEST(DeathTest, CudaFatalError)
 {
   testing::FLAGS_gtest_death_test_style = "threadsafe";
   auto call_kernel                      = []() {
-    kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>();
+    kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>();
     try {
       CUDF_CUDA_TRY(cudaDeviceSynchronize());
     } catch (const cudf::fatal_cuda_error& fe) {
@@ -140,5 +140,12 @@ TEST(DebugAssert, cudf_assert_true)
 int main(int argc, char** argv)
 {
   ::testing::InitGoogleTest(&argc, argv);
+  auto const cmd_opts    = parse_cudf_test_opts(argc, argv);
+  auto const stream_mode = cmd_opts["stream_mode"].as<std::string>();
+  if (stream_mode == "custom") {
+    auto resource = rmm::mr::get_current_device_resource();
+    auto adapter  = make_stream_checking_resource_adaptor(resource);
+    rmm::mr::set_current_device_resource(&adapter);
+  }
   return RUN_ALL_TESTS();
 }
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index a4e0736e22f..394229b460e 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -85,7 +85,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
   std::vector<decimal32> vec1(1000, decimal32{1, scale_type{-2}});
   auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1);
 
-  auto const sum = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+  auto const sum = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()),
                                   std::cbegin(d_vec1),
                                   std::cend(d_vec1),
                                   decimal32{0, scale_type{-2}});
@@ -101,7 +101,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
   std::vector<int32_t> vec2(1000);
   std::iota(std::begin(vec2), std::end(vec2), 1);
 
-  auto const res1 = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+  auto const res1 = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()),
                                    std::cbegin(d_vec1),
                                    std::cend(d_vec1),
                                    decimal32{0, scale_type{-2}});
@@ -110,9 +110,9 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
 
   EXPECT_EQ(static_cast<int32_t>(res1), res2);
 
-  rmm::device_uvector<int32_t> d_vec3(1000, cudf::default_stream_value);
+  rmm::device_uvector<int32_t> d_vec3(1000, cudf::get_default_stream());
 
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     std::cbegin(d_vec1),
                     std::cend(d_vec1),
                     std::begin(d_vec3),
diff --git a/cpp/tests/groupby/lists_tests.cu b/cpp/tests/groupby/lists_tests.cu
index 45c6b8fe2e6..e4118318792 100644
--- a/cpp/tests/groupby/lists_tests.cu
+++ b/cpp/tests/groupby/lists_tests.cu
@@ -114,7 +114,7 @@ inline void test_hash_based_sum_agg(column_view const& keys,
   // resulting table: `t [num_rows, 2 * num_rows - 1]`
   auto combined_table = cudf::concatenate(std::vector{expected_kv, result_kv});
   auto preprocessed_t = cudf::experimental::row::hash::preprocessed_table::create(
-    combined_table->view(), cudf::default_stream_value);
+    combined_table->view(), cudf::get_default_stream());
   cudf::experimental::row::equality::self_comparator comparator(preprocessed_t);
 
   auto const null_keys_are_equal =
@@ -124,7 +124,7 @@ inline void test_hash_based_sum_agg(column_view const& keys,
 
   // For each row in expected table `t[0, num_rows)`, there must be a match
   // in the resulting table `t[num_rows, 2 * num_rows)`
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              thrust::make_counting_iterator<cudf::size_type>(0),
                              thrust::make_counting_iterator<cudf::size_type>(num_rows),
                              func));
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index 736a1096fd1..f9c907767f5 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -74,7 +74,7 @@ struct tdigest_groupby_simple_op {
     // make a simple set of matching keys.
     auto keys = cudf::make_fixed_width_column(
       data_type{type_id::INT32}, values.size(), mask_state::UNALLOCATED);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  keys->mutable_view().template begin<int>(),
                  keys->mutable_view().template end<int>(),
                  0);
@@ -100,7 +100,7 @@ struct tdigest_groupby_simple_merge_op {
     // make a simple set of matching keys.
     auto merge_keys = cudf::make_fixed_width_column(
       data_type{type_id::INT32}, merge_values.size(), mask_state::UNALLOCATED);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  merge_keys->mutable_view().template begin<int>(),
                  merge_keys->mutable_view().template end<int>(),
                  0);
@@ -272,7 +272,7 @@ TEST_F(TDigestMergeTest, Grouped)
     data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
   // 3 groups. 0-250000 in group 0.  250000-500000 in group 1 and 500000-750000 in group 1
   auto key_iter = cudf::detail::make_counting_transform_iterator(0, key_groups{});
-  thrust::copy(rmm::exec_policy(cudf::default_stream_value),
+  thrust::copy(rmm::exec_policy(cudf::get_default_stream()),
                key_iter,
                key_iter + keys->size(),
                keys->mutable_view().template begin<int>());
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index 84e64027c5d..2895d3323b8 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -56,13 +56,13 @@ struct InsertTest : public cudf::test::BaseFixture {
     // prevent overflow of small types
     const size_t input_size =
       std::min(static_cast<key_type>(size), std::numeric_limits<key_type>::max());
-    pairs.resize(input_size, cudf::default_stream_value);
+    pairs.resize(input_size, cudf::get_default_stream());
     map = std::move(map_type::create(compute_hash_table_size(size)));
-    cudf::default_stream_value.synchronize();
+    cudf::get_default_stream().synchronize();
   }
 
   const cudf::size_type size{10000};
-  rmm::device_uvector<pair_type> pairs{static_cast<std::size_t>(size), cudf::default_stream_value};
+  rmm::device_uvector<pair_type> pairs{static_cast<std::size_t>(size), cudf::get_default_stream()};
   std::unique_ptr<map_type, std::function<void(map_type*)>> map;
 };
 
@@ -140,18 +140,18 @@ TYPED_TEST(InsertTest, UniqueKeysUniqueValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
                    this->pairs.begin(),
                    this->pairs.end(),
                    unique_pair_generator<pair_type>{});
   // All pairs should be new inserts
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              this->pairs.begin(),
                              this->pairs.end(),
                              insert_pair<map_type, pair_type>{*this->map}));
 
   // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              this->pairs.begin(),
                              this->pairs.end(),
                              find_pair<map_type, pair_type>{*this->map}));
@@ -161,23 +161,23 @@ TYPED_TEST(InsertTest, IdenticalKeysIdenticalValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
                    this->pairs.begin(),
                    this->pairs.end(),
                    identical_pair_generator<pair_type>{});
   // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              this->pairs.begin(),
                              this->pairs.begin() + 1,
                              insert_pair<map_type, pair_type>{*this->map}));
   // Identical inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                               this->pairs.begin(),
                               this->pairs.end(),
                               insert_pair<map_type, pair_type>{*this->map}));
 
   // All pairs should be present in the map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              this->pairs.begin(),
                              this->pairs.end(),
                              find_pair<map_type, pair_type>{*this->map}));
@@ -187,30 +187,30 @@ TYPED_TEST(InsertTest, IdenticalKeysUniqueValues)
 {
   using map_type  = typename TypeParam::map_type;
   using pair_type = typename TypeParam::pair_type;
-  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
                    this->pairs.begin(),
                    this->pairs.end(),
                    identical_key_generator<pair_type>{});
 
   // Insert a single pair
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              this->pairs.begin(),
                              this->pairs.begin() + 1,
                              insert_pair<map_type, pair_type>{*this->map}));
 
   // Identical key inserts should all return false (no new insert)
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                               this->pairs.begin() + 1,
                               this->pairs.end(),
                               insert_pair<map_type, pair_type>{*this->map}));
 
   // Only first pair is present in map
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              this->pairs.begin(),
                              this->pairs.begin() + 1,
                              find_pair<map_type, pair_type>{*this->map}));
 
-  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  EXPECT_FALSE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                               this->pairs.begin() + 1,
                               this->pairs.end(),
                               find_pair<map_type, pair_type>{*this->map}));
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index c51a7854e25..51dfc467e00 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -46,7 +46,7 @@ struct DecompressTest : public cudf::test::BaseFixture {
                   const uint8_t* compressed,
                   size_t compressed_size)
   {
-    auto stream = cudf::default_stream_value;
+    auto stream = cudf::get_default_stream();
     rmm::device_buffer src{compressed, compressed_size, stream};
     rmm::device_uvector<uint8_t> dst{decompressed->size(), stream};
 
@@ -82,7 +82,7 @@ struct GzipDecompressTest : public DecompressTest<GzipDecompressTest> {
                          d_inf_out,
                          d_inf_stat,
                          cudf::io::gzip_header_included::YES,
-                         cudf::default_stream_value);
+                         cudf::get_default_stream());
   }
 };
 
@@ -94,7 +94,7 @@ struct SnappyDecompressTest : public DecompressTest<SnappyDecompressTest> {
                 device_span<device_span<uint8_t>> d_inf_out,
                 device_span<cudf::io::compression_result> d_inf_stat)
   {
-    cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::default_stream_value);
+    cudf::io::gpu_unsnap(d_inf_in, d_inf_out, d_inf_stat, cudf::get_default_stream());
   }
 };
 
@@ -107,14 +107,14 @@ struct BrotliDecompressTest : public DecompressTest<BrotliDecompressTest> {
                 device_span<cudf::io::compression_result> d_inf_stat)
   {
     rmm::device_buffer d_scratch{cudf::io::get_gpu_debrotli_scratch_size(1),
-                                 cudf::default_stream_value};
+                                 cudf::get_default_stream()};
 
     cudf::io::gpu_debrotli(d_inf_in,
                            d_inf_out,
                            d_inf_stat,
                            d_scratch.data(),
                            d_scratch.size(),
-                           cudf::default_stream_value);
+                           cudf::get_default_stream());
   }
 };
 
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index 6f7e28a2ca3..89156c821c3 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -132,7 +132,7 @@ void print_tree(tree_meta_t2 const& cpu_tree)
 }
 void print_tree(tree_meta_t const& d_gpu_tree)
 {
-  auto const cpu_tree = to_cpu_tree(d_gpu_tree, rmm::cuda_stream_default);
+  auto const cpu_tree = to_cpu_tree(d_gpu_tree, cudf::get_default_stream());
   print_tree(cpu_tree);
 }
 
@@ -161,7 +161,7 @@ bool compare_vector(std::vector<T> const& cpu_vec,
                     rmm::device_uvector<T> const& d_vec,
                     std::string const& name)
 {
-  auto gpu_vec = cudf::detail::make_std_vector_async(d_vec, cudf::default_stream_value);
+  auto gpu_vec = cudf::detail::make_std_vector_async(d_vec, cudf::get_default_stream());
   return compare_vector(cpu_vec, gpu_vec, name);
 }
 
@@ -173,7 +173,7 @@ void compare_trees(tree_meta_t2 const& cpu_tree, tree_meta_t const& d_gpu_tree,
   EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_levels.size());
   EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_range_begin.size());
   EXPECT_EQ(cpu_num_nodes, d_gpu_tree.node_range_end.size());
-  auto gpu_tree = to_cpu_tree(d_gpu_tree, cudf::default_stream_value);
+  auto gpu_tree = to_cpu_tree(d_gpu_tree, cudf::get_default_stream());
   bool mismatch = false;
 
 #define COMPARE_MEMBER(member)                                                       \
@@ -535,7 +535,7 @@ struct JsonTest : public cudf::test::BaseFixture {
 
 TEST_F(JsonTest, TreeRepresentation)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   // Test input
   std::string const input = R"(  [{)"
@@ -632,7 +632,7 @@ TEST_F(JsonTest, TreeRepresentation)
 
 TEST_F(JsonTest, TreeRepresentation2)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   // Test input: value end with comma, space, close-brace ", }"
   std::string const input =
     // 0         1         2         3         4         5         6         7         8         9
@@ -707,7 +707,7 @@ TEST_F(JsonTest, TreeRepresentation2)
 
 TEST_F(JsonTest, TreeRepresentation3)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   // Test input: Json lines with same TreeRepresentation2 input
   std::string const input =
     R"(  {}
@@ -790,7 +790,7 @@ INSTANTIATE_TEST_SUITE_P(JsonLines,
 TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal)
 {
   auto [json_lines, input] = GetParam();
-  auto stream              = cudf::default_stream_value;
+  auto stream              = cudf::get_default_stream();
   cudf::io::json_reader_options options{};
   options.enable_lines(json_lines);
 
diff --git a/cpp/tests/io/json_type_cast_test.cu b/cpp/tests/io/json_type_cast_test.cu
index 43702f1f7e7..2170ce4a3e2 100644
--- a/cpp/tests/io/json_type_cast_test.cu
+++ b/cpp/tests/io/json_type_cast_test.cu
@@ -51,7 +51,7 @@ auto default_json_options()
 {
   auto parse_opts = cudf::io::parse_options{',', '\n', '\"', '.'};
 
-  auto const stream     = cudf::default_stream_value;
+  auto const stream     = cudf::get_default_stream();
   parse_opts.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
   parse_opts.trie_false = cudf::detail::create_serialized_trie({"false"}, stream);
   parse_opts.trie_na    = cudf::detail::create_serialized_trie({"", "null"}, stream);
@@ -60,7 +60,7 @@ auto default_json_options()
 
 TEST_F(JSONTypeCastTest, String)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
   auto const type   = cudf::data_type{cudf::type_id::STRING};
 
@@ -70,7 +70,7 @@ TEST_F(JSONTypeCastTest, String)
 
   auto d_column = cudf::column_device_view::create(input);
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     d_column->pair_begin<cudf::string_view, false>(),
                     d_column->pair_end<cudf::string_view, false>(),
                     svs.begin(),
@@ -93,14 +93,14 @@ TEST_F(JSONTypeCastTest, String)
 
 TEST_F(JSONTypeCastTest, Int)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
   auto const type   = cudf::data_type{cudf::type_id::INT64};
 
   cudf::test::strings_column_wrapper data({"1", "null", "3", "true", "5", "false"});
   auto d_column = cudf::column_device_view::create(data);
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     d_column->pair_begin<cudf::string_view, false>(),
                     d_column->pair_end<cudf::string_view, false>(),
                     svs.begin(),
@@ -120,7 +120,7 @@ TEST_F(JSONTypeCastTest, Int)
 
 TEST_F(JSONTypeCastTest, StringEscapes)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
   auto const type   = cudf::data_type{cudf::type_id::STRING};
 
@@ -137,7 +137,7 @@ TEST_F(JSONTypeCastTest, StringEscapes)
   });
   auto d_column = cudf::column_device_view::create(data);
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> svs(d_column->size(), stream);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     d_column->pair_begin<cudf::string_view, false>(),
                     d_column->pair_end<cudf::string_view, false>(),
                     svs.begin(),
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index 65926be495f..01a1f0647cc 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -139,7 +139,7 @@ TEST_F(JsonTest, StackContext)
   using StackSymbolT = char;
 
   // Prepare cuda stream for data transfers & kernels
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   // Test input
   std::string const input = R"(  [{)"
@@ -200,7 +200,7 @@ TEST_F(JsonTest, StackContextUtf8)
   using StackSymbolT = char;
 
   // Prepare cuda stream for data transfers & kernels
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   // Test input
   std::string const input = R"([{"a":{"year":1882,"author": "Bharathi"}, {"a":"filip ʒakotɛ"}}])";
@@ -251,7 +251,7 @@ TEST_F(JsonTest, TokenStream)
                             R"("price": 8.95)"
                             R"(}] )";
 
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
@@ -387,7 +387,7 @@ TEST_F(JsonTest, TokenStream2)
     R"([ {}, { "a": { "y" : 6, "z": [] }}, { "a" : { "x" : 8, "y": 9}, "b" : {"x": 10 , "z": 11)"
     "\n}}]";
 
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   // Default parsing options
   cudf::io::json_reader_options default_options{};
@@ -462,7 +462,7 @@ TEST_P(JsonParserTest, ExtractColumn)
                                        : cuio_json::detail::host_parse_nested_json;
 
   // Prepare cuda stream for data transfers & kernels
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
 
   // Default parsing options
@@ -489,7 +489,7 @@ TEST_P(JsonParserTest, ExtractColumn)
 TEST_P(JsonParserTest, UTF_JSON)
 {
   // Prepare cuda stream for data transfers & kernels
-  auto const stream      = cudf::default_stream_value;
+  auto const stream      = cudf::get_default_stream();
   auto mr                = rmm::mr::get_current_device_resource();
   bool const is_full_gpu = GetParam();
   auto json_parser       = is_full_gpu ? cuio_json::detail::device_parse_nested_json
@@ -539,7 +539,7 @@ TEST_P(JsonParserTest, ExtractColumnWithQuotes)
                                        : cuio_json::detail::host_parse_nested_json;
 
   // Prepare cuda stream for data transfers & kernels
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
 
   // Default parsing options
@@ -572,7 +572,7 @@ TEST_P(JsonParserTest, ExpectFailMixStructAndList)
                                        : cuio_json::detail::host_parse_nested_json;
 
   // Prepare cuda stream for data transfers & kernels
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
 
   // Default parsing options
@@ -610,7 +610,7 @@ TEST_P(JsonParserTest, EmptyString)
                                        : cuio_json::detail::host_parse_nested_json;
 
   // Prepare cuda stream for data transfers & kernels
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
   auto mr           = rmm::mr::get_current_device_resource();
 
   // Default parsing options
@@ -624,3 +624,5 @@ TEST_P(JsonParserTest, EmptyString)
   auto const expected_col_count = 0;
   EXPECT_EQ(cudf_table.tbl->num_columns(), expected_col_count);
 }
+
+CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp
index 2111d66a066..a3314c440a4 100644
--- a/cpp/tests/io/text/data_chunk_source_test.cpp
+++ b/cpp/tests/io/text/data_chunk_source_test.cpp
@@ -45,7 +45,7 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so
   {
     // full contents
     auto reader      = source.create_reader();
-    auto const chunk = reader->get_next_chunk(content.size(), rmm::cuda_stream_default);
+    auto const chunk = reader->get_next_chunk(content.size(), cudf::get_default_stream());
     ASSERT_EQ(chunk->size(), content.size());
     ASSERT_EQ(chunk_to_host(*chunk), content);
   }
@@ -53,15 +53,15 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so
     // skipping contents
     auto reader = source.create_reader();
     reader->skip_bytes(4);
-    auto const chunk = reader->get_next_chunk(content.size(), rmm::cuda_stream_default);
+    auto const chunk = reader->get_next_chunk(content.size(), cudf::get_default_stream());
     ASSERT_EQ(chunk->size(), content.size() - 4);
     ASSERT_EQ(chunk_to_host(*chunk), content.substr(4));
   }
   {
     // reading multiple chunks, starting with a small one
     auto reader       = source.create_reader();
-    auto const chunk1 = reader->get_next_chunk(5, rmm::cuda_stream_default);
-    auto const chunk2 = reader->get_next_chunk(content.size() - 5, rmm::cuda_stream_default);
+    auto const chunk1 = reader->get_next_chunk(5, cudf::get_default_stream());
+    auto const chunk2 = reader->get_next_chunk(content.size() - 5, cudf::get_default_stream());
     ASSERT_EQ(chunk1->size(), 5);
     ASSERT_EQ(chunk2->size(), content.size() - 5);
     ASSERT_EQ(chunk_to_host(*chunk1), content.substr(0, 5));
@@ -70,9 +70,9 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so
   {
     // reading multiple chunks
     auto reader       = source.create_reader();
-    auto const chunk1 = reader->get_next_chunk(content.size() / 2, rmm::cuda_stream_default);
+    auto const chunk1 = reader->get_next_chunk(content.size() / 2, cudf::get_default_stream());
     auto const chunk2 =
-      reader->get_next_chunk(content.size() - content.size() / 2, rmm::cuda_stream_default);
+      reader->get_next_chunk(content.size() - content.size() / 2, cudf::get_default_stream());
     ASSERT_EQ(chunk1->size(), content.size() / 2);
     ASSERT_EQ(chunk2->size(), content.size() - content.size() / 2);
     ASSERT_EQ(chunk_to_host(*chunk1), content.substr(0, content.size() / 2));
@@ -81,17 +81,17 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so
   {
     // reading too many bytes
     auto reader      = source.create_reader();
-    auto const chunk = reader->get_next_chunk(content.size() + 10, rmm::cuda_stream_default);
+    auto const chunk = reader->get_next_chunk(content.size() + 10, cudf::get_default_stream());
     ASSERT_EQ(chunk->size(), content.size());
     ASSERT_EQ(chunk_to_host(*chunk), content);
-    auto next_chunk = reader->get_next_chunk(1, rmm::cuda_stream_default);
+    auto next_chunk = reader->get_next_chunk(1, cudf::get_default_stream());
     ASSERT_EQ(next_chunk->size(), 0);
   }
   {
     // skipping past the end
     auto reader = source.create_reader();
     reader->skip_bytes(content.size() + 10);
-    auto const next_chunk = reader->get_next_chunk(1, rmm::cuda_stream_default);
+    auto const next_chunk = reader->get_next_chunk(1, cudf::get_default_stream());
     ASSERT_EQ(next_chunk->size(), 0);
   }
 }
diff --git a/cpp/tests/io/type_inference_test.cu b/cpp/tests/io/type_inference_test.cu
index 4d01ef95b85..cbaa06589cf 100644
--- a/cpp/tests/io/type_inference_test.cu
+++ b/cpp/tests/io/type_inference_test.cu
@@ -17,6 +17,7 @@
 #include <io/utilities/trie.cuh>
 #include <io/utilities/type_inference.cuh>
 
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf_test/base_fixture.hpp>
 
@@ -39,7 +40,7 @@ struct TypeInference : public cudf::test::BaseFixture {
 
 TEST_F(TypeInference, Basic)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -52,11 +53,13 @@ TEST_F(TypeInference, Basic)
 
   auto const string_offset = std::vector<int32_t>{1, 4, 7};
   auto const string_length = std::vector<std::size_t>{2, 2, 1};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -70,7 +73,7 @@ TEST_F(TypeInference, Basic)
 
 TEST_F(TypeInference, Null)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -83,11 +86,13 @@ TEST_F(TypeInference, Null)
 
   auto const string_offset = std::vector<int32_t>{1, 1, 4};
   auto const string_length = std::vector<std::size_t>{0, 2, 1};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -102,7 +107,7 @@ TEST_F(TypeInference, Null)
 
 TEST_F(TypeInference, AllNull)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -115,11 +120,13 @@ TEST_F(TypeInference, AllNull)
 
   auto const string_offset = std::vector<int32_t>{1, 1, 1};
   auto const string_length = std::vector<std::size_t>{0, 0, 4};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -133,7 +140,7 @@ TEST_F(TypeInference, AllNull)
 
 TEST_F(TypeInference, String)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -146,11 +153,13 @@ TEST_F(TypeInference, String)
 
   auto const string_offset = std::vector<int32_t>{1, 8, 12};
   auto const string_length = std::vector<std::size_t>{6, 3, 4};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -164,7 +173,7 @@ TEST_F(TypeInference, String)
 
 TEST_F(TypeInference, Bool)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -177,11 +186,13 @@ TEST_F(TypeInference, Bool)
 
   auto const string_offset = std::vector<int32_t>{1, 6, 12};
   auto const string_length = std::vector<std::size_t>{4, 5, 5};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -195,7 +206,7 @@ TEST_F(TypeInference, Bool)
 
 TEST_F(TypeInference, Timestamp)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -208,11 +219,13 @@ TEST_F(TypeInference, Timestamp)
 
   auto const string_offset = std::vector<int32_t>{1, 10};
   auto const string_length = std::vector<std::size_t>{8, 9};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -227,7 +240,7 @@ TEST_F(TypeInference, Timestamp)
 
 TEST_F(TypeInference, InvalidInput)
 {
-  auto const stream = cudf::default_stream_value;
+  auto const stream = cudf::get_default_stream();
 
   auto options       = parse_options{',', '\n', '\"'};
   options.trie_true  = cudf::detail::create_serialized_trie({"true"}, stream);
@@ -240,11 +253,13 @@ TEST_F(TypeInference, InvalidInput)
 
   auto const string_offset = std::vector<int32_t>{1, 3, 5, 7, 9};
   auto const string_length = std::vector<std::size_t>{1, 1, 1, 1, 1};
-  rmm::device_vector<int32_t> d_string_offset{string_offset};
-  rmm::device_vector<std::size_t> d_string_length{string_length};
+  auto const d_string_offset =
+    cudf::detail::make_device_uvector_async(string_offset, cudf::get_default_stream());
+  auto const d_string_length =
+    cudf::detail::make_device_uvector_async(string_length, cudf::get_default_stream());
 
   auto d_col_strings =
-    thrust::make_zip_iterator(make_tuple(d_string_offset.begin(), d_string_length.begin()));
+    thrust::make_zip_iterator(thrust::make_tuple(d_string_offset.begin(), d_string_length.begin()));
 
   auto res_type =
     infer_data_type(options.json_view(),
@@ -256,3 +271,5 @@ TEST_F(TypeInference, InvalidInput)
   // Invalid input is inferred as string for now
   EXPECT_EQ(res_type, cudf::data_type{cudf::type_id::STRING});
 }
+
+CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh
index 26902b43662..7eb2c3d70bb 100644
--- a/cpp/tests/iterator/iterator_tests.cuh
+++ b/cpp/tests/iterator/iterator_tests.cuh
@@ -50,7 +50,7 @@ struct IteratorTest : public cudf::test::BaseFixture {
   void iterator_test_cub(T_output expected, InputIterator d_in, int num_items)
   {
     T_output init = cudf::test::make_type_param_scalar<T_output>(0);
-    rmm::device_uvector<T_output> dev_result(1, cudf::default_stream_value);
+    rmm::device_uvector<T_output> dev_result(1, cudf::get_default_stream());
 
     // Get temporary storage size
     size_t temp_storage_bytes = 0;
@@ -61,10 +61,10 @@ struct IteratorTest : public cudf::test::BaseFixture {
                               num_items,
                               thrust::minimum{},
                               init,
-                              cudf::default_stream_value.value());
+                              cudf::get_default_stream().value());
 
     // Allocate temporary storage
-    rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::default_stream_value);
+    rmm::device_buffer d_temp_storage(temp_storage_bytes, cudf::get_default_stream());
 
     // Run reduction
     cub::DeviceReduce::Reduce(d_temp_storage.data(),
@@ -74,7 +74,7 @@ struct IteratorTest : public cudf::test::BaseFixture {
                               num_items,
                               thrust::minimum{},
                               init,
-                              cudf::default_stream_value.value());
+                              cudf::get_default_stream().value());
 
     evaluate(expected, dev_result, "cub test");
   }
@@ -91,14 +91,14 @@ struct IteratorTest : public cudf::test::BaseFixture {
 
     // using a temporary vector and calling transform and all_of separately is
     // equivalent to thrust::equal but compiles ~3x faster
-    auto dev_results = rmm::device_uvector<bool>(num_items, cudf::default_stream_value);
-    thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+    auto dev_results = rmm::device_uvector<bool>(num_items, cudf::get_default_stream());
+    thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                       d_in,
                       d_in_last,
                       dev_expected.begin(),
                       dev_results.begin(),
                       thrust::equal_to{});
-    auto result = thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+    auto result = thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                                  dev_results.begin(),
                                  dev_results.end(),
                                  thrust::identity<bool>{});
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index c5b7393550a..586c9472185 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -111,14 +111,14 @@ TYPED_TEST(NumericOptionalIteratorTest, mean_var_output)
 
   // this can be computed with a single reduce and without a temporary output vector
   // but the approach increases the compile time by ~2x
-  auto results = rmm::device_uvector<T_output>(d_col->size(), cudf::default_stream_value);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  auto results = rmm::device_uvector<T_output>(d_col->size(), cudf::get_default_stream());
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     it_dev_squared,
                     it_dev_squared + d_col->size(),
                     results.begin(),
                     optional_to_meanvar<T_output>{});
   auto result = thrust::reduce(
-    rmm::exec_policy(cudf::default_stream_value), results.begin(), results.end(), T_output{});
+    rmm::exec_policy(cudf::get_default_stream()), results.begin(), results.end(), T_output{});
 
   if (not std::is_floating_point<T>()) {
     EXPECT_EQ(expected_value, result) << "optional iterator reduction sum";
diff --git a/cpp/tests/iterator/pair_iterator_test_numeric.cu b/cpp/tests/iterator/pair_iterator_test_numeric.cu
index f570df44286..99ec3118b4b 100644
--- a/cpp/tests/iterator/pair_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/pair_iterator_test_numeric.cu
@@ -113,7 +113,7 @@ TYPED_TEST(NumericPairIteratorTest, mean_var_output)
   // GPU test
   auto it_dev         = d_col->pair_begin<T, true>();
   auto it_dev_squared = thrust::make_transform_iterator(it_dev, transformer);
-  auto result         = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+  auto result         = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()),
                                it_dev_squared,
                                it_dev_squared + d_col->size(),
                                thrust::make_pair(T_output{}, true),
diff --git a/cpp/tests/join/conditional_join_tests.cu b/cpp/tests/join/conditional_join_tests.cu
index f8dfc972191..920c497f850 100644
--- a/cpp/tests/join/conditional_join_tests.cu
+++ b/cpp/tests/join/conditional_join_tests.cu
@@ -230,8 +230,8 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
       // Note: Not trying to be terribly efficient here since these tests are
       // small, otherwise a batch copy to host before constructing the tuples
       // would be important.
-      result_pairs.push_back({result.first->element(i, cudf::default_stream_value),
-                              result.second->element(i, cudf::default_stream_value)});
+      result_pairs.push_back({result.first->element(i, cudf::get_default_stream()),
+                              result.second->element(i, cudf::get_default_stream())});
     }
     std::sort(result_pairs.begin(), result_pairs.end());
     std::sort(expected_outputs.begin(), expected_outputs.end());
@@ -276,11 +276,11 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
   void _compare_to_hash_join(PairJoinReturn const& result, PairJoinReturn const& reference)
   {
     auto result_pairs =
-      rmm::device_uvector<index_pair>(result.first->size(), cudf::default_stream_value);
+      rmm::device_uvector<index_pair>(result.first->size(), cudf::get_default_stream());
     auto reference_pairs =
-      rmm::device_uvector<index_pair>(reference.first->size(), cudf::default_stream_value);
+      rmm::device_uvector<index_pair>(reference.first->size(), cudf::get_default_stream());
 
-    thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+    thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                       result.first->begin(),
                       result.first->end(),
                       result.second->begin(),
@@ -288,7 +288,7 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
                       [] __device__(cudf::size_type first, cudf::size_type second) {
                         return index_pair{first, second};
                       });
-    thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+    thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                       reference.first->begin(),
                       reference.first->end(),
                       reference.second->begin(),
@@ -298,11 +298,11 @@ struct ConditionalJoinPairReturnTest : public ConditionalJoinTest<T> {
                       });
 
     thrust::sort(
-      rmm::exec_policy(cudf::default_stream_value), result_pairs.begin(), result_pairs.end());
+      rmm::exec_policy(cudf::get_default_stream()), result_pairs.begin(), result_pairs.end());
     thrust::sort(
-      rmm::exec_policy(cudf::default_stream_value), reference_pairs.begin(), reference_pairs.end());
+      rmm::exec_policy(cudf::get_default_stream()), reference_pairs.begin(), reference_pairs.end());
 
-    EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::default_stream_value),
+    EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::get_default_stream()),
                               reference_pairs.begin(),
                               reference_pairs.end(),
                               result_pairs.begin()));
@@ -713,7 +713,7 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest<T> {
       // Note: Not trying to be terribly efficient here since these tests are
       // small, otherwise a batch copy to host before constructing the tuples
       // would be important.
-      resulting_indices.push_back(result->element(i, cudf::default_stream_value));
+      resulting_indices.push_back(result->element(i, cudf::get_default_stream()));
     }
     std::sort(resulting_indices.begin(), resulting_indices.end());
     std::sort(expected_outputs.begin(), expected_outputs.end());
@@ -724,10 +724,10 @@ struct ConditionalJoinSingleReturnTest : public ConditionalJoinTest<T> {
   void _compare_to_hash_join(std::unique_ptr<rmm::device_uvector<cudf::size_type>> const& result,
                              std::unique_ptr<rmm::device_uvector<cudf::size_type>> const& reference)
   {
-    thrust::sort(rmm::exec_policy(cudf::default_stream_value), result->begin(), result->end());
+    thrust::sort(rmm::exec_policy(cudf::get_default_stream()), result->begin(), result->end());
     thrust::sort(
-      rmm::exec_policy(cudf::default_stream_value), reference->begin(), reference->end());
-    EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::default_stream_value),
+      rmm::exec_policy(cudf::get_default_stream()), reference->begin(), reference->end());
+    EXPECT_TRUE(thrust::equal(rmm::exec_policy(cudf::get_default_stream()),
                               result->begin(),
                               result->end(),
                               reference->begin()));
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index 44e1d586389..fb2eb77512c 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1499,9 +1499,9 @@ TEST_F(JoinTest, HashJoinLargeOutputSize)
 {
   // self-join a table of zeroes to generate an output row count that would overflow int32_t
   std::size_t col_size = 65567;
-  rmm::device_buffer zeroes(col_size * sizeof(int32_t), cudf::default_stream_value);
+  rmm::device_buffer zeroes(col_size * sizeof(int32_t), cudf::get_default_stream());
   CUDF_CUDA_TRY(
-    cudaMemsetAsync(zeroes.data(), 0, zeroes.size(), cudf::default_stream_value.value()));
+    cudaMemsetAsync(zeroes.data(), 0, zeroes.size(), cudf::get_default_stream().value()));
   cudf::column_view col_zeros(cudf::data_type{cudf::type_id::INT32}, col_size, zeroes.data());
   cudf::table_view tview{{col_zeros}};
   cudf::hash_join hash_join(tview, cudf::null_equality::UNEQUAL);
diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu
index dbff5a1d8fc..d252ded6627 100644
--- a/cpp/tests/join/mixed_join_tests.cu
+++ b/cpp/tests/join/mixed_join_tests.cu
@@ -228,8 +228,8 @@ struct MixedJoinPairReturnTest : public MixedJoinTest<T> {
       // Note: Not trying to be terribly efficient here since these tests are
       // small, otherwise a batch copy to host before constructing the tuples
       // would be important.
-      result_pairs.push_back({result.first->element(i, cudf::default_stream_value),
-                              result.second->element(i, cudf::default_stream_value)});
+      result_pairs.push_back({result.first->element(i, cudf::get_default_stream()),
+                              result.second->element(i, cudf::get_default_stream())});
     }
     std::sort(result_pairs.begin(), result_pairs.end());
     std::sort(expected_outputs.begin(), expected_outputs.end());
@@ -586,8 +586,8 @@ struct MixedFullJoinTest : public MixedJoinPairReturnTest<T> {
       left_equality, right_equality, left_conditional, right_conditional, predicate, compare_nulls);
     std::vector<std::pair<cudf::size_type, cudf::size_type>> result_pairs;
     for (size_t i = 0; i < result.first->size(); ++i) {
-      result_pairs.push_back({result.first->element(i, cudf::default_stream_value),
-                              result.second->element(i, cudf::default_stream_value)});
+      result_pairs.push_back({result.first->element(i, cudf::get_default_stream()),
+                              result.second->element(i, cudf::get_default_stream())});
     }
     std::sort(result_pairs.begin(), result_pairs.end());
     std::sort(expected_outputs.begin(), expected_outputs.end());
@@ -666,7 +666,7 @@ struct MixedJoinSingleReturnTest : public MixedJoinTest<T> {
       // Note: Not trying to be terribly efficient here since these tests are
       // small, otherwise a batch copy to host before constructing the tuples
       // would be important.
-      resulting_indices.push_back(result->element(i, cudf::default_stream_value));
+      resulting_indices.push_back(result->element(i, cudf::get_default_stream()));
     }
     std::sort(resulting_indices.begin(), resulting_indices.end());
     std::sort(expected_outputs.begin(), expected_outputs.end());
diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cu
index 0ca63526c51..82151caea53 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cu
+++ b/cpp/tests/quantiles/percentile_approx_test.cu
@@ -234,7 +234,7 @@ void simple_test(data_type input_type, std::vector<std::pair<int, int>> params)
   // all in the same group
   auto keys = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
-  thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+  thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                keys->mutable_view().template begin<int>(),
                keys->mutable_view().template end<int>(),
                0);
@@ -257,7 +257,7 @@ void grouped_test(data_type input_type, std::vector<std::pair<int, int>> params)
   auto keys = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
   auto i = thrust::make_counting_iterator(0);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     i,
                     i + values->size(),
                     keys->mutable_view().template begin<int>(),
@@ -282,7 +282,7 @@ void simple_with_nulls_test(data_type input_type, std::vector<std::pair<int, int
   // all in the same group
   auto keys = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
-  thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+  thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                keys->mutable_view().template begin<int>(),
                keys->mutable_view().template end<int>(),
                0);
@@ -304,7 +304,7 @@ void grouped_with_nulls_test(data_type input_type, std::vector<std::pair<int, in
   auto keys = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
   auto i = thrust::make_counting_iterator(0);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     i,
                     i + values->size(),
                     keys->mutable_view().template begin<int>(),
diff --git a/cpp/tests/quantiles/tdigest_utilities.cu b/cpp/tests/quantiles/tdigest_utilities.cu
index 3cf2f2eb4ef..68147dc29eb 100644
--- a/cpp/tests/quantiles/tdigest_utilities.cu
+++ b/cpp/tests/quantiles/tdigest_utilities.cu
@@ -65,15 +65,15 @@ void tdigest_sample_compare(cudf::tdigest::tdigest_column_view const& tdv,
   }
 
   auto d_expected_src =
-    cudf::detail::make_device_uvector_async(h_expected_src, cudf::default_stream_value);
+    cudf::detail::make_device_uvector_async(h_expected_src, cudf::get_default_stream());
   auto d_expected_mean =
-    cudf::detail::make_device_uvector_async(h_expected_mean, cudf::default_stream_value);
+    cudf::detail::make_device_uvector_async(h_expected_mean, cudf::get_default_stream());
   auto d_expected_weight =
-    cudf::detail::make_device_uvector_async(h_expected_weight, cudf::default_stream_value);
+    cudf::detail::make_device_uvector_async(h_expected_weight, cudf::get_default_stream());
 
   auto iter = thrust::make_counting_iterator(0);
   thrust::for_each(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     iter,
     iter + h_expected.size(),
     [expected_src_in     = d_expected_src.data(),
@@ -122,13 +122,13 @@ std::unique_ptr<column> make_expected_tdigest_column(std::vector<expected_tdiges
 
     auto min_col =
       cudf::make_fixed_width_column(data_type{type_id::FLOAT64}, 1, mask_state::UNALLOCATED);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  min_col->mutable_view().begin<double>(),
                  min_col->mutable_view().end<double>(),
                  tdigest.min);
     auto max_col =
       cudf::make_fixed_width_column(data_type{type_id::FLOAT64}, 1, mask_state::UNALLOCATED);
-    thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+    thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                  max_col->mutable_view().begin<double>(),
                  max_col->mutable_view().end<double>(),
                  tdigest.max);
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index a8547ea982d..c0c4f580393 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -19,6 +19,7 @@
 #include <cudf_test/type_lists.hpp>
 
 #include <cudf/aggregation.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/scalar/scalar_factories.hpp>
 #include <cudf/types.hpp>
@@ -52,7 +53,8 @@ TYPED_TEST(SegmentedReductionTest, SumExcludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{6, 4, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
@@ -98,7 +100,8 @@ TYPED_TEST(SegmentedReductionTest, ProductExcludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{15, 15, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
@@ -144,7 +147,8 @@ TYPED_TEST(SegmentedReductionTest, MaxExcludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
@@ -190,7 +194,8 @@ TYPED_TEST(SegmentedReductionTest, MinExcludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}};
 
@@ -236,9 +241,10 @@ TYPED_TEST(SegmentedReductionTest, AnyExcludeNulls)
   auto const input = fixed_width_column_wrapper<TypeParam>{
     {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX},
     {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
-  auto const offsets   = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = fixed_width_column_wrapper<bool>{
     {false, false, true, true, bool{XXX}, false, true, bool{XXX}, bool{XXX}},
     {true, true, true, true, false, true, true, false, false}};
 
@@ -285,9 +291,10 @@ TYPED_TEST(SegmentedReductionTest, AllExcludeNulls)
   auto const input = fixed_width_column_wrapper<TypeParam>{
     {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0},
     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}};
-  auto const offsets   = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = fixed_width_column_wrapper<bool>{
     {true, true, bool{XXX}, true, bool{XXX}, bool{XXX}, false, false, false},
     {true, true, false, true, false, false, true, true, true}};
 
@@ -335,7 +342,8 @@ TYPED_TEST(SegmentedReductionTest, SumIncludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{6, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
@@ -384,7 +392,8 @@ TYPED_TEST(SegmentedReductionTest, ProductIncludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 3, 5, XXX, 3, 5, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{15, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
@@ -433,7 +442,8 @@ TYPED_TEST(SegmentedReductionTest, MaxIncludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
@@ -482,7 +492,8 @@ TYPED_TEST(SegmentedReductionTest, MinIncludeNulls)
   auto const input   = fixed_width_column_wrapper<TypeParam>{{1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX},
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0}};
   auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<TypeParam>{{1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}};
 
@@ -531,9 +542,10 @@ TYPED_TEST(SegmentedReductionTest, AnyIncludeNulls)
   auto const input = fixed_width_column_wrapper<TypeParam>{
     {0, 0, 0, 0, XXX, 0, 0, 1, 0, 1, XXX, 0, 0, 1, XXX, XXX, XXX},
     {1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0}};
-  auto const offsets   = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets = std::vector<size_type>{0, 3, 6, 9, 12, 12, 13, 14, 15, 17};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = fixed_width_column_wrapper<bool>{
     {false, bool{XXX}, true, bool{XXX}, bool{XXX}, false, true, bool{XXX}, bool{XXX}},
     {true, false, true, false, false, true, true, false, false}};
 
@@ -592,9 +604,10 @@ TYPED_TEST(SegmentedReductionTest, AllIncludeNulls)
   auto const input = fixed_width_column_wrapper<TypeParam>{
     {1, 2, 3, 1, XXX, 3, 1, XXX, XXX, XXX, 1, 0, 3, 1, XXX, 0, 0},
     {1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1}};
-  auto const offsets   = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<bool>{
+  auto const offsets = std::vector<size_type>{0, 3, 6, 6, 7, 8, 10, 13, 16, 17};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = fixed_width_column_wrapper<bool>{
     {true, bool{XXX}, bool{XXX}, true, bool{XXX}, bool{XXX}, false, bool{XXX}, false},
     {true, false, false, true, false, false, true, false, true}};
 
@@ -655,9 +668,10 @@ TEST_F(SegmentedReductionTestUntyped, PartialSegmentReduction)
 
   auto const input = fixed_width_column_wrapper<int32_t>{
     {1, 2, 3, 4, 5, 6, 7}, {true, true, true, true, true, true, true}};
-  auto const offsets   = std::vector<size_type>{1, 3, 4};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<int32_t>{{5, 4}, {true, true}};
+  auto const offsets = std::vector<size_type>{1, 3, 4};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = fixed_width_column_wrapper<int32_t>{{5, 4}, {true, true}};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -702,9 +716,10 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput)
   // outputs: {1, 5, 4}
   // output nullmask: {1, 1, 1}
 
-  auto const input     = fixed_width_column_wrapper<int32_t>{1, 2, 3, 4, 5, 6, 7};
-  auto const offsets   = std::vector<size_type>{0, 1, 1, 3, 7};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const input   = fixed_width_column_wrapper<int32_t>{1, 2, 3, 4, 5, 6, 7};
+  auto const offsets = std::vector<size_type>{0, 1, 1, 3, 7};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<int32_t>{{1, XXX, 5, 22}, {true, false, true, true}};
 
@@ -745,10 +760,11 @@ TEST_F(SegmentedReductionTestUntyped, NonNullableInput)
 
 TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn)
 {
-  auto const input     = fixed_width_column_wrapper<int32_t>{};
-  auto const offsets   = std::vector<size_type>{0};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = fixed_width_column_wrapper<int32_t>{};
+  auto const input   = fixed_width_column_wrapper<int32_t>{};
+  auto const offsets = std::vector<size_type>{0};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = fixed_width_column_wrapper<int32_t>{};
 
   auto res = segmented_reduce(input,
                               d_offsets,
@@ -780,9 +796,10 @@ TEST_F(SegmentedReductionTestUntyped, ReduceEmptyColumn)
 
 TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
 {
-  auto const input     = fixed_width_column_wrapper<int32_t>{};
-  auto const offsets   = std::vector<size_type>{0, 0, 0, 0, 0, 0};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
+  auto const input   = fixed_width_column_wrapper<int32_t>{};
+  auto const offsets = std::vector<size_type>{0, 0, 0, 0, 0, 0};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
   auto const expect =
     fixed_width_column_wrapper<int32_t>{{XXX, XXX, XXX, XXX, XXX}, {0, 0, 0, 0, 0}};
 
@@ -840,9 +857,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxIncludeNulls)
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                            numeric::scale_type{scale});
     auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type        = column_view(input).type();
-    auto const expect    = fixed_point_column_wrapper<RepType>(
+    auto const d_offsets =
+      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type     = column_view(input).type();
+    auto const expect = fixed_point_column_wrapper<RepType>(
       {3, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -872,9 +890,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxExcludeNulls)
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                            numeric::scale_type{scale});
     auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type        = column_view(input).type();
-    auto const expect    = fixed_point_column_wrapper<RepType>(
+    auto const d_offsets =
+      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type     = column_view(input).type();
+    auto const expect = fixed_point_column_wrapper<RepType>(
       {3, 3, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -904,9 +923,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinIncludeNulls)
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                            numeric::scale_type{scale});
     auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type        = column_view(input).type();
-    auto const expect    = fixed_point_column_wrapper<RepType>(
+    auto const d_offsets =
+      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type     = column_view(input).type();
+    auto const expect = fixed_point_column_wrapper<RepType>(
       {1, XXX, 1, XXX, XXX, XXX}, {1, 0, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -936,9 +956,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinExcludeNulls)
                                                            {1, 1, 1, 1, 0, 1, 1, 0, 0, 0},
                                                            numeric::scale_type{scale});
     auto const offsets = std::vector<size_type>{0, 3, 6, 7, 8, 10, 10};
-    auto const d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type        = column_view(input).type();
-    auto const expect    = fixed_point_column_wrapper<RepType>(
+    auto const d_offsets =
+      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type     = column_view(input).type();
+    auto const expect = fixed_point_column_wrapper<RepType>(
       {1, 1, 1, XXX, XXX, XXX}, {1, 1, 1, 0, 0, 0}, numeric::scale_type{scale});
 
     auto res = segmented_reduce(input,
@@ -965,9 +986,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MaxNonNullableInput)
   for (auto scale : {-2, 0, 5}) {
     auto const input =
       fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
-    auto const offsets   = std::vector<size_type>{0, 3, 4, 4};
-    auto const d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type        = column_view(input).type();
+    auto const offsets = std::vector<size_type>{0, 3, 4, 4};
+    auto const d_offsets =
+      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type = column_view(input).type();
     auto const expect =
       fixed_point_column_wrapper<RepType>({3, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
 
@@ -1002,9 +1024,10 @@ TYPED_TEST(SegmentedReductionFixedPointTest, MinNonNullableInput)
   for (auto scale : {-2, 0, 5}) {
     auto const input =
       fixed_point_column_wrapper<RepType>({1, 2, 3, 1}, numeric::scale_type{scale});
-    auto const offsets   = std::vector<size_type>{0, 3, 4, 4};
-    auto const d_offsets = thrust::device_vector<size_type>(offsets);
-    auto out_type        = column_view(input).type();
+    auto const offsets = std::vector<size_type>{0, 3, 4, 4};
+    auto const d_offsets =
+      cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+    auto out_type = column_view(input).type();
     auto const expect =
       fixed_point_column_wrapper<RepType>({1, 1, XXX}, {1, 1, 0}, numeric::scale_type{scale});
 
@@ -1148,10 +1171,11 @@ TEST_F(SegmentedReductionStringTest, MinExcludeNulls)
 
 TEST_F(SegmentedReductionStringTest, EmptyInputWithOffsets)
 {
-  auto const input     = strings_column_wrapper{};
-  auto const offsets   = std::vector<size_type>{0, 0, 0, 0};
-  auto const d_offsets = thrust::device_vector<size_type>(offsets);
-  auto const expect    = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0});
+  auto const input   = strings_column_wrapper{};
+  auto const offsets = std::vector<size_type>{0, 0, 0, 0};
+  auto const d_offsets =
+    cudf::detail::make_device_uvector_async(offsets, cudf::get_default_stream());
+  auto const expect = strings_column_wrapper({XXX, XXX, XXX}, {0, 0, 0});
 
   auto result = segmented_reduce(input,
                                  d_offsets,
diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp
index 9624ab52865..ef4a9dea48c 100644
--- a/cpp/tests/replace/replace_nulls_tests.cpp
+++ b/cpp/tests/replace/replace_nulls_tests.cpp
@@ -176,8 +176,8 @@ TEST_F(ReplaceNullsStringsTest, SimpleReplaceScalar)
   std::vector<std::string> input{"", "", "", "", "", "", "", ""};
   std::vector<cudf::valid_type> input_v{0, 0, 0, 0, 0, 0, 0, 0};
   std::unique_ptr<cudf::scalar> repl =
-    cudf::make_string_scalar("rep", cudf::default_stream_value, mr());
-  repl->set_valid_async(true, cudf::default_stream_value);
+    cudf::make_string_scalar("rep", cudf::get_default_stream(), mr());
+  repl->set_valid_async(true, cudf::get_default_stream());
   std::vector<std::string> expected{"rep", "rep", "rep", "rep", "rep", "rep", "rep", "rep"};
 
   cudf::test::strings_column_wrapper input_w{input.begin(), input.end(), input_v.begin()};
diff --git a/cpp/tests/scalar/factories_test.cpp b/cpp/tests/scalar/factories_test.cpp
index b531623d548..73cf3479ac2 100644
--- a/cpp/tests/scalar/factories_test.cpp
+++ b/cpp/tests/scalar/factories_test.cpp
@@ -28,7 +28,7 @@
 
 class ScalarFactoryTest : public cudf::test::BaseFixture {
  public:
-  rmm::cuda_stream_view stream() { return cudf::default_stream_value; }
+  rmm::cuda_stream_view stream() { return cudf::get_default_stream(); }
 };
 
 template <typename T>
diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu
index f4a1c94c3e6..1a0fea7219e 100644
--- a/cpp/tests/scalar/scalar_device_view_test.cu
+++ b/cpp/tests/scalar/scalar_device_view_test.cu
@@ -57,20 +57,20 @@ TYPED_TEST(TypedScalarDeviceViewTest, Value)
 
   auto scalar_device_view  = cudf::get_scalar_device_view(s);
   auto scalar_device_view1 = cudf::get_scalar_device_view(s1);
-  rmm::device_scalar<bool> result{cudf::default_stream_value};
+  rmm::device_scalar<bool> result{cudf::get_default_stream()};
 
-  test_set_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view,
+  test_set_value<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view,
                                                                   scalar_device_view1);
   CUDF_CHECK_CUDA(0);
 
   EXPECT_EQ(s1.value(), value);
   EXPECT_TRUE(s1.is_valid());
 
-  test_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+  test_value<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     scalar_device_view, scalar_device_view1, result.data());
   CUDF_CHECK_CUDA(0);
 
-  EXPECT_TRUE(result.value(cudf::default_stream_value));
+  EXPECT_TRUE(result.value(cudf::get_default_stream()));
 }
 
 template <typename ScalarDeviceViewType>
@@ -84,12 +84,12 @@ TYPED_TEST(TypedScalarDeviceViewTest, ConstructNull)
   TypeParam value = cudf::test::make_type_param_scalar<TypeParam>(5);
   cudf::scalar_type_t<TypeParam> s(value, false);
   auto scalar_device_view = cudf::get_scalar_device_view(s);
-  rmm::device_scalar<bool> result{cudf::default_stream_value};
+  rmm::device_scalar<bool> result{cudf::get_default_stream()};
 
-  test_null<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view, result.data());
+  test_null<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view, result.data());
   CUDF_CHECK_CUDA(0);
 
-  EXPECT_FALSE(result.value(cudf::default_stream_value));
+  EXPECT_FALSE(result.value(cudf::get_default_stream()));
 }
 
 template <typename ScalarDeviceViewType>
@@ -106,7 +106,7 @@ TYPED_TEST(TypedScalarDeviceViewTest, SetNull)
   s.set_valid_async(true);
   EXPECT_TRUE(s.is_valid());
 
-  test_setnull<<<1, 1, 0, cudf::default_stream_value.value()>>>(scalar_device_view);
+  test_setnull<<<1, 1, 0, cudf::get_default_stream().value()>>>(scalar_device_view);
   CUDF_CHECK_CUDA(0);
 
   EXPECT_FALSE(s.is_valid());
@@ -129,12 +129,12 @@ TEST_F(StringScalarDeviceViewTest, Value)
   cudf::string_scalar s(value);
 
   auto scalar_device_view = cudf::get_scalar_device_view(s);
-  rmm::device_scalar<bool> result{cudf::default_stream_value};
+  rmm::device_scalar<bool> result{cudf::get_default_stream()};
   auto value_v = cudf::detail::make_device_uvector_sync(value);
 
-  test_string_value<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+  test_string_value<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     scalar_device_view, value_v.data(), value.size(), result.data());
   CUDF_CHECK_CUDA(0);
 
-  EXPECT_TRUE(result.value(cudf::default_stream_value));
+  EXPECT_TRUE(result.value(cudf::get_default_stream()));
 }
diff --git a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp
index 99d5c90d1a4..2f8bfa847fa 100644
--- a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp
+++ b/cpp/tests/stream_compaction/apply_boolean_mask_tests.cpp
@@ -273,7 +273,7 @@ TEST_F(ApplyBooleanMask, CorrectNullCount)
   auto got     = cudf::apply_boolean_mask(input, boolean_mask);
   auto out_col = got->get_column(0).view();
   auto expected_null_count =
-    cudf::detail::null_count(out_col.null_mask(), 0, out_col.size(), cudf::default_stream_value);
+    cudf::detail::null_count(out_col.null_mask(), 0, out_col.size(), cudf::get_default_stream());
 
   ASSERT_EQ(out_col.null_count(), expected_null_count);
 }
diff --git a/cpp/tests/strings/datetime_tests.cpp b/cpp/tests/strings/datetime_tests.cpp
index 26beaf9756a..dc42fb283dd 100644
--- a/cpp/tests/strings/datetime_tests.cpp
+++ b/cpp/tests/strings/datetime_tests.cpp
@@ -430,12 +430,14 @@ TEST_F(StringsDatetimeTest, FromTimestampDayOfYear)
 
 // Format names used for some specifiers in from_timestamps
 // clang-format off
-cudf::test::strings_column_wrapper format_names({"AM", "PM",
-  "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
-  "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat",
-  "January", "February", "March", "April", "May", "June", "July",
-  "August", "September", "October", "November", "December",
-  "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"});
+cudf::test::strings_column_wrapper format_names() {
+  return cudf::test::strings_column_wrapper({"AM", "PM",
+    "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
+    "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat",
+    "January", "February", "March", "April", "May", "June", "July",
+    "August", "September", "October", "November", "December",
+    "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"});
+}
 // clang-format on
 
 TEST_F(StringsDatetimeTest, FromTimestampDayOfWeekOfYear)
@@ -492,8 +494,9 @@ TEST_F(StringsDatetimeTest, FromTimestampDayOfWeekOfYear)
      "[Fri 01, Jan 1982  5  00  5  00  1981  53]", "[Sat 02, Jan 1982  6  00  6  00  1981  53]",
      "[Sun 03, Jan 1982  0  00  7  01  1981  53]"});
 
-  auto results = cudf::strings::from_timestamps(
-    timestamps, "[%a %d, %b %Y  %w  %W  %u  %U  %G  %V]", cudf::strings_column_view(format_names));
+  auto results = cudf::strings::from_timestamps(timestamps,
+                                                "[%a %d, %b %Y  %w  %W  %u  %U  %G  %V]",
+                                                cudf::strings_column_view(format_names()));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
@@ -528,7 +531,7 @@ TEST_F(StringsDatetimeTest, FromTimestampWeekdayMonthYear)
                                                "[Monday December 06, 2021: 02 AM]"});
 
   auto results = cudf::strings::from_timestamps(
-    timestamps, "[%A %B %d, %Y: %I %p]", cudf::strings_column_view(format_names));
+    timestamps, "[%A %B %d, %Y: %I %p]", cudf::strings_column_view(format_names()));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
@@ -549,7 +552,7 @@ TEST_F(StringsDatetimeTest, FromTimestampAllSpecifiers)
   auto results = cudf::strings::from_timestamps(
     input,
     "[%d/%m/%y/%Y %H:%I:%M:%S.%f %z:%Z %j %u %U %W %V %G %p %a %A %b %B]",
-    cudf::strings_column_view(format_names));
+    cudf::strings_column_view(format_names()));
 
   // clang-format off
   cudf::test::strings_column_wrapper expected({
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index a381c1cff89..c27f48a9069 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -59,7 +59,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
     memsize += *itr ? (cudf::size_type)strlen(*itr) : 0;
   cudf::size_type count = (cudf::size_type)h_test_strings.size();
   thrust::host_vector<char> h_buffer(memsize);
-  rmm::device_uvector<char> d_buffer(memsize, cudf::default_stream_value);
+  rmm::device_uvector<char> d_buffer(memsize, cudf::get_default_stream());
   thrust::host_vector<thrust::pair<const char*, cudf::size_type>> strings(count);
   thrust::host_vector<cudf::size_type> h_offsets(count + 1);
   cudf::size_type offset = 0;
@@ -97,12 +97,12 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
   // check string data
   auto h_chars_data = cudf::detail::make_std_vector_sync(
     cudf::device_span<char const>(strings_view.chars().data<char>(), strings_view.chars().size()),
-    cudf::default_stream_value);
+    cudf::get_default_stream());
   auto h_offsets_data = cudf::detail::make_std_vector_sync(
     cudf::device_span<cudf::offset_type const>(
       strings_view.offsets().data<cudf::offset_type>() + strings_view.offset(),
       strings_view.size() + 1),
-    cudf::default_stream_value);
+    cudf::get_default_stream());
   EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0);
   EXPECT_EQ(
     memcmp(h_offsets.data(), h_offsets_data.data(), h_offsets.size() * sizeof(cudf::size_type)), 0);
@@ -159,12 +159,12 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets)
   // check string data
   auto h_chars_data = cudf::detail::make_std_vector_sync(
     cudf::device_span<char const>(strings_view.chars().data<char>(), strings_view.chars().size()),
-    cudf::default_stream_value);
+    cudf::get_default_stream());
   auto h_offsets_data = cudf::detail::make_std_vector_sync(
     cudf::device_span<cudf::offset_type const>(
       strings_view.offsets().data<cudf::offset_type>() + strings_view.offset(),
       strings_view.size() + 1),
-    cudf::default_stream_value);
+    cudf::get_default_stream());
   EXPECT_EQ(memcmp(h_buffer.data(), h_chars_data.data(), h_buffer.size()), 0);
   EXPECT_EQ(
     memcmp(h_offsets.data(), h_offsets_data.data(), h_offsets.size() * sizeof(cudf::size_type)), 0);
@@ -183,15 +183,15 @@ TEST_F(StringsFactoriesTest, CreateScalar)
 
 TEST_F(StringsFactoriesTest, EmptyStringsColumn)
 {
-  rmm::device_uvector<char> d_chars{0, cudf::default_stream_value};
+  rmm::device_uvector<char> d_chars{0, cudf::get_default_stream()};
   auto d_offsets = cudf::detail::make_zeroed_device_uvector_sync<cudf::size_type>(1);
-  rmm::device_uvector<cudf::bitmask_type> d_nulls{0, cudf::default_stream_value};
+  rmm::device_uvector<cudf::bitmask_type> d_nulls{0, cudf::get_default_stream()};
 
   auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0);
   cudf::test::expect_column_empty(results->view());
 
   rmm::device_uvector<thrust::pair<const char*, cudf::size_type>> d_strings{
-    0, cudf::default_stream_value};
+    0, cudf::get_default_stream()};
   results = cudf::make_strings_column(d_strings);
   cudf::test::expect_column_empty(results->view());
 }
@@ -213,8 +213,8 @@ TEST_F(StringsFactoriesTest, StringPairWithNullsAndEmpty)
     {0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1});
 
   auto d_column = cudf::column_device_view::create(data);
-  rmm::device_uvector<string_pair> pairs(d_column->size(), cudf::default_stream_value);
-  thrust::transform(rmm::exec_policy(cudf::default_stream_value),
+  rmm::device_uvector<string_pair> pairs(d_column->size(), cudf::get_default_stream());
+  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
                     d_column->pair_begin<cudf::string_view, true>(),
                     d_column->pair_end<cudf::string_view, true>(),
                     pairs.data(),
diff --git a/cpp/tests/table/experimental_row_operator_tests.cu b/cpp/tests/table/experimental_row_operator_tests.cu
index 0566f55e46d..427d819ace3 100644
--- a/cpp/tests/table/experimental_row_operator_tests.cu
+++ b/cpp/tests/table/experimental_row_operator_tests.cu
@@ -51,7 +51,7 @@ auto self_comparison(cudf::table_view input,
                      std::vector<cudf::order> const& column_order,
                      PhysicalElementComparator comparator)
 {
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto const table_comparator = lexicographic::self_comparator{input, column_order, {}, stream};
 
@@ -82,7 +82,7 @@ auto two_table_comparison(cudf::table_view lhs,
                           std::vector<cudf::order> const& column_order,
                           PhysicalElementComparator comparator)
 {
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto const table_comparator =
     lexicographic::two_table_comparator{lhs, rhs, column_order, {}, stream};
@@ -115,7 +115,7 @@ auto self_equality(cudf::table_view input,
                    std::vector<cudf::order> const& column_order,
                    PhysicalElementComparator comparator)
 {
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto const table_comparator = equality::self_comparator{input, stream};
   auto const equal_comparator =
@@ -139,7 +139,7 @@ auto two_table_equality(cudf::table_view lhs,
                         std::vector<cudf::order> const& column_order,
                         PhysicalElementComparator comparator)
 {
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto const table_comparator = equality::two_table_comparator{lhs, rhs, stream};
   auto const equal_comparator =
diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu
index d678e659f79..a092006bda6 100644
--- a/cpp/tests/table/table_view_tests.cu
+++ b/cpp/tests/table/table_view_tests.cu
@@ -43,7 +43,7 @@ void row_comparison(cudf::table_view input1,
                     cudf::mutable_column_view output,
                     std::vector<cudf::order> const& column_order)
 {
-  rmm::cuda_stream_view stream{cudf::default_stream_value};
+  rmm::cuda_stream_view stream{cudf::get_default_stream()};
 
   auto device_table_1 = cudf::table_device_view::create(input1, stream);
   auto device_table_2 = cudf::table_device_view::create(input2, stream);
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 8151e0d6d8d..b1cfc7a39d1 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -53,7 +53,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypes)
   // expect size of the type per row
   auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16);
   cudf::mutable_column_view mcv(*expected);
-  thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+  thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                mcv.begin<size_type>(),
                mcv.end<size_type>(),
                sizeof(device_storage_type_t<T>) * CHAR_BIT);
@@ -76,7 +76,7 @@ TYPED_TEST(RowBitCountTyped, SimpleTypesWithNulls)
   // expect size of the type + 1 bit per row
   auto expected = make_fixed_width_column(data_type{type_id::INT32}, 16);
   cudf::mutable_column_view mcv(*expected);
-  thrust::fill(rmm::exec_policy(cudf::default_stream_value),
+  thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
                mcv.begin<size_type>(),
                mcv.end<size_type>(),
                (sizeof(device_storage_type_t<T>) * CHAR_BIT) + 1);
@@ -240,7 +240,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
   // List child column = {0, 1, 2, 3, 4, ..., 2*num_rows};
   auto ints      = make_numeric_column(data_type{type_id::INT32}, num_rows * 2);
   auto ints_view = ints->mutable_view();
-  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
                    ints_view.begin<int32_t>(),
                    ints_view.end<int32_t>(),
                    thrust::identity{});
@@ -248,7 +248,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
   // List offsets = {0, 2, 4, 6, 8, ..., num_rows*2};
   auto list_offsets      = make_numeric_column(data_type{type_id::INT32}, num_rows + 1);
   auto list_offsets_view = list_offsets->mutable_view();
-  thrust::tabulate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::tabulate(rmm::exec_policy(cudf::get_default_stream()),
                    list_offsets_view.begin<offset_type>(),
                    list_offsets_view.end<offset_type>(),
                    times_2{});
@@ -264,7 +264,7 @@ TEST_F(RowBitCount, StructsWithLists_RowsExceedingASingleBlock)
   // Compute row_bit_count, and compare.
   auto row_bit_counts          = row_bit_count(table_view{{structs_column->view()}});
   auto expected_row_bit_counts = make_numeric_column(data_type{type_id::INT32}, num_rows);
-  thrust::fill_n(rmm::exec_policy(cudf::default_stream_value),
+  thrust::fill_n(rmm::exec_policy(cudf::get_default_stream()),
                  expected_row_bit_counts->mutable_view().begin<int32_t>(),
                  num_rows,
                  CHAR_BIT * (2 * sizeof(int32_t) + sizeof(offset_type)));
@@ -613,7 +613,7 @@ TEST_F(RowBitCount, Table)
   auto expected   = cudf::make_fixed_width_column(data_type{type_id::INT32}, t.num_rows());
   cudf::mutable_column_view mcv(*expected);
   thrust::transform(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     thrust::make_counting_iterator(0),
     thrust::make_counting_iterator(0) + t.num_rows(),
     mcv.begin<size_type>(),
diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu
index 3280339ea85..e3856759cfc 100644
--- a/cpp/tests/types/type_dispatcher_test.cu
+++ b/cpp/tests/types/type_dispatcher_test.cu
@@ -70,10 +70,10 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result)
 TYPED_TEST(TypedDispatcherTest, DeviceDispatch)
 {
   auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
-  dispatch_test_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+  dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     cudf::type_to_id<TypeParam>(), result.data());
   CUDF_CUDA_TRY(cudaDeviceSynchronize());
-  EXPECT_EQ(true, result.front_element(cudf::default_stream_value));
+  EXPECT_EQ(true, result.front_element(cudf::get_default_stream()));
 }
 
 struct IdDispatcherTest : public DispatcherTest, public testing::WithParamInterface<cudf::type_id> {
@@ -131,10 +131,10 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2
 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch)
 {
   auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
-  double_dispatch_test_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(
+  double_dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     cudf::type_to_id<TypeParam>(), cudf::type_to_id<TypeParam>(), result.data());
   CUDF_CUDA_TRY(cudaDeviceSynchronize());
-  EXPECT_EQ(true, result.front_element(cudf::default_stream_value));
+  EXPECT_EQ(true, result.front_element(cudf::get_default_stream()));
 }
 
 struct IdDoubleDispatcherTest : public DispatcherTest,
diff --git a/cpp/tests/unary/cast_tests.cpp b/cpp/tests/unary/cast_tests.cpp
index fd9211a56e5..ac68a277622 100644
--- a/cpp/tests/unary/cast_tests.cpp
+++ b/cpp/tests/unary/cast_tests.cpp
@@ -90,70 +90,70 @@ inline cudf::column make_exp_chrono_column(cudf::type_id type_id)
         test_timestamps_D.size(),
         rmm::device_buffer{test_timestamps_D.data(),
                            test_timestamps_D.size() * sizeof(test_timestamps_D.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::TIMESTAMP_SECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_timestamps_s.size(),
         rmm::device_buffer{test_timestamps_s.data(),
                            test_timestamps_s.size() * sizeof(test_timestamps_s.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::TIMESTAMP_MILLISECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_timestamps_ms.size(),
         rmm::device_buffer{test_timestamps_ms.data(),
                            test_timestamps_ms.size() * sizeof(test_timestamps_ms.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::TIMESTAMP_MICROSECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_timestamps_us.size(),
         rmm::device_buffer{test_timestamps_us.data(),
                            test_timestamps_us.size() * sizeof(test_timestamps_us.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::TIMESTAMP_NANOSECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_timestamps_ns.size(),
         rmm::device_buffer{test_timestamps_ns.data(),
                            test_timestamps_ns.size() * sizeof(test_timestamps_ns.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::DURATION_DAYS:
       return cudf::column(
         cudf::data_type{type_id},
         test_durations_D.size(),
         rmm::device_buffer{test_durations_D.data(),
                            test_durations_D.size() * sizeof(test_durations_D.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::DURATION_SECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_durations_s.size(),
         rmm::device_buffer{test_durations_s.data(),
                            test_durations_s.size() * sizeof(test_durations_s.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::DURATION_MILLISECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_durations_ms.size(),
         rmm::device_buffer{test_durations_ms.data(),
                            test_durations_ms.size() * sizeof(test_durations_ms.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::DURATION_MICROSECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_durations_us.size(),
         rmm::device_buffer{test_durations_us.data(),
                            test_durations_us.size() * sizeof(test_durations_us.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     case cudf::type_id::DURATION_NANOSECONDS:
       return cudf::column(
         cudf::data_type{type_id},
         test_durations_ns.size(),
         rmm::device_buffer{test_durations_ns.data(),
                            test_durations_ns.size() * sizeof(test_durations_ns.front()),
-                           cudf::default_stream_value});
+                           cudf::get_default_stream()});
     default: CUDF_FAIL("Unsupported type_id");
   }
 };
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index d0fc92b0bb5..080bb3ef916 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -66,7 +66,7 @@ std::unique_ptr<column> generate_all_row_indices(size_type num_rows)
 {
   auto indices =
     cudf::make_fixed_width_column(data_type{type_id::INT32}, num_rows, mask_state::UNALLOCATED);
-  thrust::sequence(rmm::exec_policy(cudf::default_stream_value),
+  thrust::sequence(rmm::exec_policy(cudf::get_default_stream()),
                    indices->mutable_view().begin<size_type>(),
                    indices->mutable_view().end<size_type>(),
                    0);
@@ -103,7 +103,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   // if we are checking for exact equality, we should be checking for "unsanitized" data that may
   // be hiding underneath nulls. so check all rows instead of just non-null rows
   if (check_exact_equality) {
-    return generate_all_row_indices(c.get_sliced_child(cudf::default_stream_value).size());
+    return generate_all_row_indices(c.get_sliced_child(cudf::get_default_stream()).size());
   }
 
   // Example input
@@ -132,7 +132,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
                ? (offsets[true_index + 1] - offsets[true_index])
                : 0;
     });
-  auto const output_size = thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+  auto const output_size = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()),
                                           row_size_iter,
                                           row_size_iter + row_indices.size());
   // no output. done.
@@ -147,7 +147,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   //
   auto output_row_start = cudf::make_fixed_width_column(
     data_type{type_id::INT32}, row_indices.size(), mask_state::UNALLOCATED);
-  thrust::exclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+  thrust::exclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                          row_size_iter,
                          row_size_iter + row_indices.size(),
                          output_row_start->mutable_view().begin<size_type>());
@@ -156,7 +156,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   //
   // result = [1, 1, 1, 1, 1]
   //
-  thrust::generate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::generate(rmm::exec_policy(cudf::get_default_stream()),
                    result->mutable_view().begin<size_type>(),
                    result->mutable_view().end<size_type>(),
                    [] __device__() { return 1; });
@@ -171,11 +171,11 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
      offsets      = c.offsets().begin<offset_type>(),
      offset       = c.offset(),
      first_offset = cudf::detail::get_value<offset_type>(
-       c.offsets(), c.offset(), cudf::default_stream_value)] __device__(int index) {
+       c.offsets(), c.offset(), cudf::get_default_stream())] __device__(int index) {
       auto const true_index = row_indices[index] + offset;
       return offsets[true_index] - first_offset;
     });
-  thrust::scatter_if(rmm::exec_policy(cudf::default_stream_value),
+  thrust::scatter_if(rmm::exec_policy(cudf::get_default_stream()),
                      output_row_iter,
                      output_row_iter + row_indices.size(),
                      output_row_start->view().begin<size_type>(),
@@ -189,18 +189,18 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   //
   auto keys =
     cudf::make_fixed_width_column(data_type{type_id::INT32}, output_size, mask_state::UNALLOCATED);
-  thrust::generate(rmm::exec_policy(cudf::default_stream_value),
+  thrust::generate(rmm::exec_policy(cudf::get_default_stream()),
                    keys->mutable_view().begin<size_type>(),
                    keys->mutable_view().end<size_type>(),
                    [] __device__() { return 0; });
-  thrust::scatter_if(rmm::exec_policy(cudf::default_stream_value),
+  thrust::scatter_if(rmm::exec_policy(cudf::get_default_stream()),
                      row_size_iter,
                      row_size_iter + row_indices.size(),
                      output_row_start->view().begin<size_type>(),
                      row_size_iter,
                      keys->mutable_view().begin<size_type>(),
                      [] __device__(auto row_size) { return row_size != 0; });
-  thrust::inclusive_scan(rmm::exec_policy(cudf::default_stream_value),
+  thrust::inclusive_scan(rmm::exec_policy(cudf::get_default_stream()),
                          keys->view().begin<size_type>(),
                          keys->view().end<size_type>(),
                          keys->mutable_view().begin<size_type>());
@@ -213,7 +213,7 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
   // output
   //    result = [6, 7, 11, 12, 13]
   //
-  thrust::inclusive_scan_by_key(rmm::exec_policy(cudf::default_stream_value),
+  thrust::inclusive_scan_by_key(rmm::exec_policy(cudf::get_default_stream()),
                                 keys->view().begin<size_type>(),
                                 keys->view().end<size_type>(),
                                 result->view().begin<size_type>(),
@@ -256,7 +256,7 @@ struct column_property_comparator {
         auto const true_index = row_indices[index] + offset;
         return !validity || cudf::bit_is_set(validity, true_index) ? 0 : 1;
       });
-    return thrust::reduce(rmm::exec_policy(cudf::default_stream_value),
+    return thrust::reduce(rmm::exec_policy(cudf::get_default_stream()),
                           validity_iter,
                           validity_iter + row_indices.size());
   }
@@ -328,8 +328,8 @@ struct column_property_comparator {
     auto lhs_child_indices =
       generate_child_row_indices(lhs_l, lhs_row_indices, check_exact_equality);
     if (lhs_child_indices->size() > 0) {
-      auto lhs_child = lhs_l.get_sliced_child(cudf::default_stream_value);
-      auto rhs_child = rhs_l.get_sliced_child(cudf::default_stream_value);
+      auto lhs_child = lhs_l.get_sliced_child(cudf::get_default_stream());
+      auto rhs_child = rhs_l.get_sliced_child(cudf::get_default_stream());
       auto rhs_child_indices =
         generate_child_row_indices(rhs_l, rhs_row_indices, check_exact_equality);
       return cudf::type_dispatcher(lhs_child.type(),
@@ -516,9 +516,9 @@ std::string stringify_column_differences(cudf::device_span<int const> difference
     auto const index = h_differences[0];  // only stringify first difference
 
     auto const lhs_index =
-      cudf::detail::get_value<size_type>(lhs_row_indices, index, cudf::default_stream_value);
+      cudf::detail::get_value<size_type>(lhs_row_indices, index, cudf::get_default_stream());
     auto const rhs_index =
-      cudf::detail::get_value<size_type>(rhs_row_indices, index, cudf::default_stream_value);
+      cudf::detail::get_value<size_type>(rhs_row_indices, index, cudf::get_default_stream());
     auto diff_lhs = cudf::detail::slice(lhs, lhs_index, lhs_index + 1);
     auto diff_rhs = cudf::detail::slice(rhs, rhs_index, rhs_index + 1);
     return depth_str + "first difference: " + "lhs[" + std::to_string(index) +
@@ -549,17 +549,17 @@ struct column_comparator_impl {
                                               corresponding_rows_not_equivalent>;
 
     auto differences = rmm::device_uvector<int>(
-      lhs.size(), cudf::default_stream_value);  // worst case: everything different
+      lhs.size(), cudf::get_default_stream());  // worst case: everything different
     auto input_iter = thrust::make_counting_iterator(0);
     auto diff_iter  = thrust::copy_if(
-      rmm::exec_policy(cudf::default_stream_value),
+      rmm::exec_policy(cudf::get_default_stream()),
       input_iter,
       input_iter + lhs_row_indices.size(),
       differences.begin(),
       ComparatorType(*d_lhs, *d_rhs, *d_lhs_row_indices, *d_rhs_row_indices, fp_ulps));
 
     differences.resize(thrust::distance(differences.begin(), diff_iter),
-                       cudf::default_stream_value);  // shrink back down
+                       cudf::get_default_stream());  // shrink back down
 
     if (not differences.is_empty()) {
       if (verbosity != debug_output_level::QUIET) {
@@ -597,13 +597,13 @@ struct column_comparator_impl<list_view, check_exact_equality> {
     if (lhs_row_indices.is_empty()) { return true; }
 
     // worst case - everything is different
-    rmm::device_uvector<int> differences(lhs_row_indices.size(), cudf::default_stream_value);
+    rmm::device_uvector<int> differences(lhs_row_indices.size(), cudf::get_default_stream());
 
     // compare offsets, taking slicing into account
 
     // left side
     size_type lhs_shift = cudf::detail::get_value<size_type>(
-      lhs_l.offsets(), lhs_l.offset(), cudf::default_stream_value);
+      lhs_l.offsets(), lhs_l.offset(), cudf::get_default_stream());
     auto lhs_offsets = thrust::make_transform_iterator(
       lhs_l.offsets().begin<size_type>() + lhs_l.offset(),
       [lhs_shift] __device__(size_type offset) { return offset - lhs_shift; });
@@ -615,7 +615,7 @@ struct column_comparator_impl<list_view, check_exact_equality> {
 
     // right side
     size_type rhs_shift = cudf::detail::get_value<size_type>(
-      rhs_l.offsets(), rhs_l.offset(), cudf::default_stream_value);
+      rhs_l.offsets(), rhs_l.offset(), cudf::get_default_stream());
     auto rhs_offsets = thrust::make_transform_iterator(
       rhs_l.offsets().begin<size_type>() + rhs_l.offset(),
       [rhs_shift] __device__(size_type offset) { return offset - rhs_shift; });
@@ -643,7 +643,7 @@ struct column_comparator_impl<list_view, check_exact_equality> {
     //
     auto input_iter = thrust::make_counting_iterator(0);
     auto diff_iter  = thrust::copy_if(
-      rmm::exec_policy(cudf::default_stream_value),
+      rmm::exec_policy(cudf::get_default_stream()),
       input_iter,
       input_iter + lhs_row_indices.size(),
       differences.begin(),
@@ -679,7 +679,7 @@ struct column_comparator_impl<list_view, check_exact_equality> {
       });
 
     differences.resize(thrust::distance(differences.begin(), diff_iter),
-                       cudf::default_stream_value);  // shrink back down
+                       cudf::get_default_stream());  // shrink back down
 
     if (not differences.is_empty()) {
       if (verbosity != debug_output_level::QUIET) {
@@ -698,8 +698,8 @@ struct column_comparator_impl<list_view, check_exact_equality> {
     auto lhs_child_indices =
       generate_child_row_indices(lhs_l, lhs_row_indices, check_exact_equality);
     if (lhs_child_indices->size() > 0) {
-      auto lhs_child = lhs_l.get_sliced_child(cudf::default_stream_value);
-      auto rhs_child = rhs_l.get_sliced_child(cudf::default_stream_value);
+      auto lhs_child = lhs_l.get_sliced_child(cudf::get_default_stream());
+      auto rhs_child = rhs_l.get_sliced_child(cudf::get_default_stream());
       auto rhs_child_indices =
         generate_child_row_indices(rhs_l, rhs_row_indices, check_exact_equality);
       return cudf::type_dispatcher(lhs_child.type(),
@@ -875,7 +875,7 @@ void expect_equal_buffers(void const* lhs, void const* rhs, std::size_t size_byt
   auto typed_lhs = static_cast<char const*>(lhs);
   auto typed_rhs = static_cast<char const*>(rhs);
   EXPECT_TRUE(thrust::equal(
-    rmm::exec_policy(cudf::default_stream_value), typed_lhs, typed_lhs + size_bytes, typed_rhs));
+    rmm::exec_policy(cudf::get_default_stream()), typed_lhs, typed_lhs + size_bytes, typed_rhs));
 }
 
 /**
@@ -964,13 +964,13 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons
 
   // the first offset value to normalize everything against
   size_type first =
-    cudf::detail::get_value<size_type>(offsets, c.offset(), cudf::default_stream_value);
-  rmm::device_uvector<size_type> shifted_offsets(output_size, cudf::default_stream_value);
+    cudf::detail::get_value<size_type>(offsets, c.offset(), cudf::get_default_stream());
+  rmm::device_uvector<size_type> shifted_offsets(output_size, cudf::get_default_stream());
 
   // normalize the offset values for the column offset
   size_type const* d_offsets = offsets.head<size_type>() + c.offset();
   thrust::transform(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     d_offsets,
     d_offsets + output_size,
     shifted_offsets.begin(),
@@ -1146,7 +1146,7 @@ struct column_view_printer {
     lists_column_view lcv(col);
 
     // propagate slicing to the child if necessary
-    column_view child    = lcv.get_sliced_child(cudf::default_stream_value);
+    column_view child    = lcv.get_sliced_child(cudf::get_default_stream());
     bool const is_sliced = lcv.offset() > 0 || child.offset() > 0;
 
     std::string tmp =
diff --git a/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt b/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
new file mode 100644
index 00000000000..89f40303550
--- /dev/null
+++ b/cpp/tests/utilities/identify_stream_usage/CMakeLists.txt
@@ -0,0 +1,60 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+cmake_minimum_required(VERSION 3.23.1 FATAL_ERROR)
+
+if(NOT EXISTS ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
+  file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-22.12/RAPIDS.cmake
+       ${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake
+  )
+endif()
+include(${CMAKE_CURRENT_BINARY_DIR}/CUDF_RAPIDS.cmake)
+
+project(
+  IDENTIFY_STREAM_USAGE
+  VERSION 0.0.1
+  LANGUAGES CXX CUDA
+)
+
+include(rapids-cpm)
+include(${rapids-cmake-dir}/cpm/rmm.cmake)
+rapids_cpm_init()
+rapids_cpm_rmm()
+
+set(CMAKE_CUDA_RUNTIME_LIBRARY SHARED)
+add_library(identify_stream_usage SHARED identify_stream_usage.cpp)
+
+find_package(CUDAToolkit REQUIRED)
+
+set_target_properties(identify_stream_usage PROPERTIES CUDA_RUNTIME_LIBRARY SHARED)
+target_link_libraries(identify_stream_usage PUBLIC CUDA::cudart rmm::rmm)
+
+set_target_properties(
+  identify_stream_usage
+  PROPERTIES # set target compile options
+             CXX_STANDARD 17
+             CXX_STANDARD_REQUIRED ON
+             POSITION_INDEPENDENT_CODE ON
+)
+
+# Add the test file.
+include(CTest)
+
+add_executable(Tests test_default_stream_identification.cu)
+add_test(NAME default_stream_identification COMMAND Tests)
+
+set_tests_properties(
+  default_stream_identification PROPERTIES ENVIRONMENT
+                                           LD_PRELOAD=$<TARGET_FILE:identify_stream_usage>
+)
diff --git a/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp b/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
new file mode 100644
index 00000000000..4a1a8f04791
--- /dev/null
+++ b/cpp/tests/utilities/identify_stream_usage/identify_stream_usage.cpp
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <rmm/cuda_stream.hpp>
+#include <rmm/cuda_stream_view.hpp>
+
+#include <cuda_runtime.h>
+
+#include <cxxabi.h>
+#include <dlfcn.h>
+#include <execinfo.h>
+#include <iostream>
+#include <stdexcept>
+#include <unordered_map>
+
+/**
+ * @brief Print a backtrace and raise an error if stream is a default stream.
+ */
+void check_stream_and_error(cudaStream_t stream)
+{
+  // We explicitly list the possibilities rather than using
+  // `cudf::get_default_stream().value()` for two reasons:
+  // 1. There is no guarantee that `thrust::device` and the default value of
+  //    `cudf::get_default_stream().value()` are actually the same. At present,
+  //    the former is `cudaStreamLegacy` while the latter is 0.
+  // 2. Using the cudf default stream would require linking against cudf, which
+  //    adds unnecessary complexity to the build process (especially in CI)
+  //    when this simple approach is sufficient.
+  if (stream == cudaStreamDefault || (stream == cudaStreamLegacy) ||
+      (stream == cudaStreamPerThread)) {
+#ifdef __GNUC__
+    // If we're on the wrong stream, print the stack trace from the current frame.
+    // Adapted from from https://panthema.net/2008/0901-stacktrace-demangled/
+    constexpr int kMaxStackDepth = 64;
+    void* stack[kMaxStackDepth];
+    auto depth   = backtrace(stack, kMaxStackDepth);
+    auto strings = backtrace_symbols(stack, depth);
+
+    if (strings == nullptr) {
+      std::cout << "No stack trace could be found!" << std::endl;
+    } else {
+      // If we were able to extract a trace, parse it, demangle symbols, and
+      // print a readable output.
+
+      // allocate string which will be filled with the demangled function name
+      size_t funcnamesize = 256;
+      char* funcname      = (char*)malloc(funcnamesize);
+
+      // Start at frame 1 to skip print_trace itself.
+      for (int i = 1; i < depth; ++i) {
+        char* begin_name   = nullptr;
+        char* begin_offset = nullptr;
+        char* end_offset   = nullptr;
+
+        // find parentheses and +address offset surrounding the mangled name:
+        // ./module(function+0x15c) [0x8048a6d]
+        for (char* p = strings[i]; *p; ++p) {
+          if (*p == '(') {
+            begin_name = p;
+          } else if (*p == '+') {
+            begin_offset = p;
+          } else if (*p == ')' && begin_offset) {
+            end_offset = p;
+            break;
+          }
+        }
+
+        if (begin_name && begin_offset && end_offset && begin_name < begin_offset) {
+          *begin_name++   = '\0';
+          *begin_offset++ = '\0';
+          *end_offset     = '\0';
+
+          // mangled name is now in [begin_name, begin_offset) and caller offset
+          // in [begin_offset, end_offset). now apply __cxa_demangle():
+
+          int status;
+          char* ret = abi::__cxa_demangle(begin_name, funcname, &funcnamesize, &status);
+          if (status == 0) {
+            funcname =
+              ret;  // use possibly realloc()-ed string (__cxa_demangle may realloc funcname)
+            std::cout << "#" << i << " in " << strings[i] << " : " << funcname << "+"
+                      << begin_offset << std::endl;
+          } else {
+            // demangling failed. Output function name as a C function with no arguments.
+            std::cout << "#" << i << " in " << strings[i] << " : " << begin_name << "()+"
+                      << begin_offset << std::endl;
+          }
+        } else {
+          std::cout << "#" << i << " in " << strings[i] << std::endl;
+        }
+      }
+
+      free(funcname);
+    }
+    free(strings);
+#else
+    std::cout << "Backtraces are only when built with a GNU compiler." << std::endl;
+#endif  // __GNUC__
+    throw std::runtime_error("Found unexpected default stream!");
+  }
+}
+
+/**
+ * @brief Container for CUDA APIs that have been overloaded using DEFINE_OVERLOAD.
+ *
+ * This variable must be initialized before everything else.
+ *
+ * @see find_originals for a description of the priorities
+ */
+__attribute__((init_priority(1001))) std::unordered_map<std::string, void*> originals;
+
+/**
+ * @brief Macro for generating functions to override existing CUDA functions.
+ *
+ * Define a new function with the provided signature that checks the used
+ * stream and raises an exception if it is one of CUDA's default streams. If
+ * not, the new function forwards all arguments to the original function.
+ *
+ * Note that since this only defines the function, we do not need default
+ * parameter values since those will be provided by the original declarations
+ * in CUDA itself.
+ *
+ * @see find_originals for a description of the priorities
+ *
+ * @param function The function to overload.
+ * @param signature The function signature (must include names, not just types).
+ * @parameter arguments The function arguments (names only, no types).
+ */
+#define DEFINE_OVERLOAD(function, signature, arguments)     \
+  using function##_t = cudaError_t (*)(signature);          \
+                                                            \
+  cudaError_t function(signature)                           \
+  {                                                         \
+    check_stream_and_error(stream);                         \
+    return ((function##_t)originals[#function])(arguments); \
+  }                                                         \
+  __attribute__((constructor(1002))) void queue_##function() { originals[#function] = nullptr; }
+
+/**
+ * @brief Helper macro to define macro arguments that contain a comma.
+ */
+#define ARG(...) __VA_ARGS__
+
+// clang-format off
+/*
+   We need to overload all the functions from the runtime API (assuming that we
+   don't use the driver API) that accept streams. The main webpage for APIs is
+   https://docs.nvidia.com/cuda/cuda-runtime-api/modules.html#modules. Here are
+   the modules containing any APIs using streams as of 9/20/2022:
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT - Done
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXTRES__INTEROP.html#group__CUDART__EXTRES__INTEROP
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION - Done
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY - Done
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS - Done
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__OPENGL__DEPRECATED.html#group__CUDART__OPENGL__DEPRECATED
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EGL.html#group__CUDART__EGL
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__INTEROP.html#group__CUDART__INTEROP
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__GRAPH.html#group__CUDART__GRAPH
+   - https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__HIGHLEVEL.html#group__CUDART__HIGHLEVEL
+ */
+// clang-format on
+
+// Event APIS:
+// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html#group__CUDART__EVENT
+DEFINE_OVERLOAD(cudaEventRecord, ARG(cudaEvent_t event, cudaStream_t stream), ARG(event, stream));
+
+DEFINE_OVERLOAD(cudaEventRecordWithFlags,
+                ARG(cudaEvent_t event, cudaStream_t stream, unsigned int flags),
+                ARG(event, stream, flags));
+
+// Execution APIS:
+// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EXECUTION.html#group__CUDART__EXECUTION
+DEFINE_OVERLOAD(cudaLaunchKernel,
+                ARG(const void* func,
+                    dim3 gridDim,
+                    dim3 blockDim,
+                    void** args,
+                    size_t sharedMem,
+                    cudaStream_t stream),
+                ARG(func, gridDim, blockDim, args, sharedMem, stream));
+DEFINE_OVERLOAD(cudaLaunchCooperativeKernel,
+                ARG(const void* func,
+                    dim3 gridDim,
+                    dim3 blockDim,
+                    void** args,
+                    size_t sharedMem,
+                    cudaStream_t stream),
+                ARG(func, gridDim, blockDim, args, sharedMem, stream));
+DEFINE_OVERLOAD(cudaLaunchHostFunc,
+                ARG(cudaStream_t stream, cudaHostFn_t fn, void* userData),
+                ARG(stream, fn, userData));
+
+// Memory transfer APIS:
+// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY
+DEFINE_OVERLOAD(cudaMemPrefetchAsync,
+                ARG(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream),
+                ARG(devPtr, count, dstDevice, stream));
+DEFINE_OVERLOAD(cudaMemcpy2DAsync,
+                ARG(void* dst,
+                    size_t dpitch,
+                    const void* src,
+                    size_t spitch,
+                    size_t width,
+                    size_t height,
+                    cudaMemcpyKind kind,
+                    cudaStream_t stream),
+                ARG(dst, dpitch, src, spitch, width, height, kind, stream));
+DEFINE_OVERLOAD(cudaMemcpy2DFromArrayAsync,
+                ARG(void* dst,
+                    size_t dpitch,
+                    cudaArray_const_t src,
+                    size_t wOffset,
+                    size_t hOffset,
+                    size_t width,
+                    size_t height,
+                    cudaMemcpyKind kind,
+                    cudaStream_t stream),
+                ARG(dst, dpitch, src, wOffset, hOffset, width, height, kind, stream));
+DEFINE_OVERLOAD(cudaMemcpy2DToArrayAsync,
+                ARG(cudaArray_t dst,
+                    size_t wOffset,
+                    size_t hOffset,
+                    const void* src,
+                    size_t spitch,
+                    size_t width,
+                    size_t height,
+                    cudaMemcpyKind kind,
+                    cudaStream_t stream),
+                ARG(dst, wOffset, hOffset, src, spitch, width, height, kind, stream));
+DEFINE_OVERLOAD(cudaMemcpy3DAsync,
+                ARG(const cudaMemcpy3DParms* p, cudaStream_t stream),
+                ARG(p, stream));
+DEFINE_OVERLOAD(cudaMemcpy3DPeerAsync,
+                ARG(const cudaMemcpy3DPeerParms* p, cudaStream_t stream),
+                ARG(p, stream));
+DEFINE_OVERLOAD(
+  cudaMemcpyAsync,
+  ARG(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream),
+  ARG(dst, src, count, kind, stream));
+DEFINE_OVERLOAD(cudaMemcpyFromSymbolAsync,
+                ARG(void* dst,
+                    const void* symbol,
+                    size_t count,
+                    size_t offset,
+                    cudaMemcpyKind kind,
+                    cudaStream_t stream),
+                ARG(dst, symbol, count, offset, kind, stream));
+DEFINE_OVERLOAD(cudaMemcpyToSymbolAsync,
+                ARG(const void* symbol,
+                    const void* src,
+                    size_t count,
+                    size_t offset,
+                    cudaMemcpyKind kind,
+                    cudaStream_t stream),
+                ARG(symbol, src, count, offset, kind, stream));
+DEFINE_OVERLOAD(
+  cudaMemset2DAsync,
+  ARG(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream),
+  ARG(devPtr, pitch, value, width, height, stream));
+DEFINE_OVERLOAD(
+  cudaMemset3DAsync,
+  ARG(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream),
+  ARG(pitchedDevPtr, value, extent, stream));
+DEFINE_OVERLOAD(cudaMemsetAsync,
+                ARG(void* devPtr, int value, size_t count, cudaStream_t stream),
+                ARG(devPtr, value, count, stream));
+
+// Memory allocation APIS:
+// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html#group__CUDART__MEMORY__POOLS
+DEFINE_OVERLOAD(cudaFreeAsync, ARG(void* devPtr, cudaStream_t stream), ARG(devPtr, stream));
+DEFINE_OVERLOAD(cudaMallocAsync,
+                ARG(void** devPtr, size_t size, cudaStream_t stream),
+                ARG(devPtr, size, stream));
+DEFINE_OVERLOAD(cudaMallocFromPoolAsync,
+                ARG(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream),
+                ARG(ptr, size, memPool, stream));
+
+namespace cudf {
+
+/**
+ * @brief Get the current default stream
+ *
+ * Overload the default function to return a new stream here.
+ *
+ * @return The current default stream.
+ */
+rmm::cuda_stream_view const get_default_stream()
+{
+  static rmm::cuda_stream stream{};
+  return {stream};
+}
+
+}  // namespace cudf
+
+/**
+ * @brief Function to collect all the original CUDA symbols corresponding to overloaded functions.
+ *
+ * Note on priorities:
+ * - `originals` must be initialized first, so it is 1001.
+ * - The function names must be added to originals next in the macro, so those are 1002.
+ * - Finally, this function actually finds the original symbols so it is 1003.
+ */
+__attribute__((constructor(1003))) void find_originals()
+{
+  for (auto it : originals) {
+    originals[it.first] = dlsym(RTLD_NEXT, it.first.data());
+  }
+}
diff --git a/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu b/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu
new file mode 100644
index 00000000000..022244b148b
--- /dev/null
+++ b/cpp/tests/utilities/identify_stream_usage/test_default_stream_identification.cu
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdexcept>
+
+__global__ void kernel() { printf("The kernel ran!\n"); }
+
+void test_cudaLaunchKernel()
+{
+  cudaStream_t stream;
+  cudaStreamCreate(&stream);
+  kernel<<<1, 1, 0, stream>>>();
+  cudaError_t err{cudaDeviceSynchronize()};
+  if (err != cudaSuccess) { throw std::runtime_error("Kernel failed on non-default stream!"); }
+  err = cudaGetLastError();
+  if (err != cudaSuccess) { throw std::runtime_error("Kernel failed on non-default stream!"); }
+
+  try {
+    kernel<<<1, 1>>>();
+  } catch (std::runtime_error) {
+    return;
+  }
+  throw std::runtime_error("No exception raised for kernel on default stream!");
+}
+
+int main() { test_cudaLaunchKernel(); }
diff --git a/cpp/tests/utilities_tests/span_tests.cu b/cpp/tests/utilities_tests/span_tests.cu
index cccef4b6284..a043e723eda 100644
--- a/cpp/tests/utilities_tests/span_tests.cu
+++ b/cpp/tests/utilities_tests/span_tests.cu
@@ -212,11 +212,14 @@ TEST(SpanTest, CanConstructFromHostContainers)
   (void)host_span<int const>(h_vector_c);
 }
 
+// This test is the only place in libcudf's test suite where using a
+// thrust::device_vector (and therefore the CUDA default stream) is acceptable
+// since we are explicitly testing conversions from thrust::device_vector.
 TEST(SpanTest, CanConstructFromDeviceContainers)
 {
   auto d_thrust_vector = thrust::device_vector<int>(1);
   auto d_vector        = rmm::device_vector<int>(1);
-  auto d_uvector       = rmm::device_uvector<int>(1, cudf::default_stream_value);
+  auto d_uvector       = rmm::device_uvector<int>(1, cudf::get_default_stream());
 
   (void)device_span<int>(d_thrust_vector);
   (void)device_span<int>(d_vector);
@@ -236,13 +239,13 @@ __global__ void simple_device_kernel(device_span<bool> result) { result[0] = tru
 TEST(SpanTest, CanUseDeviceSpan)
 {
   auto d_message =
-    cudf::detail::make_zeroed_device_uvector_async<bool>(1, cudf::default_stream_value);
+    cudf::detail::make_zeroed_device_uvector_async<bool>(1, cudf::get_default_stream());
 
   auto d_span = device_span<bool>(d_message.data(), d_message.size());
 
-  simple_device_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(d_span);
+  simple_device_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(d_span);
 
-  ASSERT_TRUE(d_message.element(0, cudf::default_stream_value));
+  ASSERT_TRUE(d_message.element(0, cudf::get_default_stream()));
 }
 
 class MdSpanTest : public cudf::test::BaseFixture {
@@ -250,9 +253,9 @@ class MdSpanTest : public cudf::test::BaseFixture {
 
 TEST(MdSpanTest, CanDetermineEmptiness)
 {
-  auto const vector            = hostdevice_2dvector<int>(1, 2, cudf::default_stream_value);
-  auto const no_rows_vector    = hostdevice_2dvector<int>(0, 2, cudf::default_stream_value);
-  auto const no_columns_vector = hostdevice_2dvector<int>(1, 0, cudf::default_stream_value);
+  auto const vector            = hostdevice_2dvector<int>(1, 2, cudf::get_default_stream());
+  auto const no_rows_vector    = hostdevice_2dvector<int>(0, 2, cudf::get_default_stream());
+  auto const no_columns_vector = hostdevice_2dvector<int>(1, 0, cudf::get_default_stream());
 
   EXPECT_FALSE(host_2dspan<int const>{vector}.is_empty());
   EXPECT_FALSE(device_2dspan<int const>{vector}.is_empty());
@@ -273,17 +276,17 @@ __global__ void readwrite_kernel(device_2dspan<int> result)
 
 TEST(MdSpanTest, DeviceReadWrite)
 {
-  auto vector = hostdevice_2dvector<int>(11, 23, cudf::default_stream_value);
+  auto vector = hostdevice_2dvector<int>(11, 23, cudf::get_default_stream());
 
-  readwrite_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(vector);
-  readwrite_kernel<<<1, 1, 0, cudf::default_stream_value.value()>>>(vector);
-  vector.device_to_host(cudf::default_stream_value, true);
+  readwrite_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(vector);
+  readwrite_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(vector);
+  vector.device_to_host(cudf::get_default_stream(), true);
   EXPECT_EQ(vector[5][6], 30);
 }
 
 TEST(MdSpanTest, HostReadWrite)
 {
-  auto vector = hostdevice_2dvector<int>(11, 23, cudf::default_stream_value);
+  auto vector = hostdevice_2dvector<int>(11, 23, cudf::get_default_stream());
   auto span   = host_2dspan<int>{vector};
   span[5][6]  = 5;
   if (span[5][6] == 5) { span[5][6] *= 6; }
@@ -293,7 +296,7 @@ TEST(MdSpanTest, HostReadWrite)
 
 TEST(MdSpanTest, CanGetSize)
 {
-  auto const vector = hostdevice_2dvector<int>(1, 2, cudf::default_stream_value);
+  auto const vector = hostdevice_2dvector<int>(1, 2, cudf::get_default_stream());
 
   EXPECT_EQ(host_2dspan<int const>{vector}.size(), vector.size());
   EXPECT_EQ(device_2dspan<int const>{vector}.size(), vector.size());
@@ -301,7 +304,7 @@ TEST(MdSpanTest, CanGetSize)
 
 TEST(MdSpanTest, CanGetCount)
 {
-  auto const vector = hostdevice_2dvector<int>(11, 23, cudf::default_stream_value);
+  auto const vector = hostdevice_2dvector<int>(11, 23, cudf::get_default_stream());
 
   EXPECT_EQ(host_2dspan<int const>{vector}.count(), 11ul * 23);
   EXPECT_EQ(device_2dspan<int const>{vector}.count(), 11ul * 23);
diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu
index 73bfd15744a..9aad90788e0 100644
--- a/cpp/tests/wrappers/timestamps_test.cu
+++ b/cpp/tests/wrappers/timestamps_test.cu
@@ -38,7 +38,7 @@
 
 template <typename T>
 struct ChronoColumnTest : public cudf::test::BaseFixture {
-  rmm::cuda_stream_view stream() { return cudf::default_stream_value; }
+  rmm::cuda_stream_view stream() { return cudf::get_default_stream(); }
   cudf::size_type size() { return cudf::size_type(100); }
   cudf::data_type type() { return cudf::data_type{cudf::type_to_id<T>()}; }
 };
@@ -93,9 +93,9 @@ TYPED_TEST(ChronoColumnTest, ChronoDurationsMatchPrimitiveRepresentation)
   auto primitive_col =
     fixed_width_column_wrapper<Rep>(chrono_col_data.begin(), chrono_col_data.end());
 
-  rmm::device_uvector<int32_t> indices(this->size(), cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), indices.begin(), indices.end());
-  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::default_stream_value),
+  rmm::device_uvector<int32_t> indices(this->size(), cudf::get_default_stream());
+  thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end());
+  EXPECT_TRUE(thrust::all_of(rmm::exec_policy(cudf::get_default_stream()),
                              indices.begin(),
                              indices.end(),
                              compare_chrono_elements_to_primitive_representation<T>{
@@ -147,11 +147,11 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
   auto chrono_rhs_col =
     generate_timestamps<T>(this->size(), time_point_ms(start_rhs), time_point_ms(stop_rhs));
 
-  rmm::device_uvector<int32_t> indices(this->size(), cudf::default_stream_value);
-  thrust::sequence(rmm::exec_policy(cudf::default_stream_value), indices.begin(), indices.end());
+  rmm::device_uvector<int32_t> indices(this->size(), cudf::get_default_stream());
+  thrust::sequence(rmm::exec_policy(cudf::get_default_stream()), indices.begin(), indices.end());
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::LESS,
@@ -159,7 +159,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_rhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::GREATER,
@@ -167,7 +167,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_lhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::LESS_EQUAL,
@@ -175,7 +175,7 @@ TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
                                        *cudf::column_device_view::create(chrono_lhs_col)}));
 
   EXPECT_TRUE(thrust::all_of(
-    rmm::exec_policy(cudf::default_stream_value),
+    rmm::exec_policy(cudf::get_default_stream()),
     indices.begin(),
     indices.end(),
     compare_chrono_elements<TypeParam>{cudf::binary_operator::GREATER_EQUAL,
diff --git a/java/src/main/native/include/maps_column_view.hpp b/java/src/main/native/include/maps_column_view.hpp
index b9b60f4e3b2..5ac8d5c5713 100644
--- a/java/src/main/native/include/maps_column_view.hpp
+++ b/java/src/main/native/include/maps_column_view.hpp
@@ -38,7 +38,7 @@ namespace jni {
 class maps_column_view {
 public:
   maps_column_view(lists_column_view const &lists_of_structs,
-                   rmm::cuda_stream_view stream = cudf::default_stream_value);
+                   rmm::cuda_stream_view stream = cudf::get_default_stream());
 
   // Rule of 5.
   maps_column_view(maps_column_view const &maps_view) = default;
@@ -82,7 +82,7 @@ class maps_column_view {
    * @return std::unique_ptr<column> Column of values corresponding the value of the lookup key.
    */
   std::unique_ptr<column> get_values_for(
-      column_view const &keys, rmm::cuda_stream_view stream = cudf::default_stream_value,
+      column_view const &keys, rmm::cuda_stream_view stream = cudf::get_default_stream(),
       rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const;
 
   /**
@@ -100,7 +100,7 @@ class maps_column_view {
    * @return std::unique_ptr<column>
    */
   std::unique_ptr<column> get_values_for(
-      scalar const &key, rmm::cuda_stream_view stream = cudf::default_stream_value,
+      scalar const &key, rmm::cuda_stream_view stream = cudf::get_default_stream(),
       rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const;
 
   /**
@@ -120,7 +120,7 @@ class maps_column_view {
    * @return std::unique_ptr<column>
    */
   std::unique_ptr<column>
-  contains(scalar const &key, rmm::cuda_stream_view stream = cudf::default_stream_value,
+  contains(scalar const &key, rmm::cuda_stream_view stream = cudf::get_default_stream(),
            rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const;
 
   /**
@@ -141,7 +141,7 @@ class maps_column_view {
    */
 
   std::unique_ptr<column>
-  contains(column_view const &key, rmm::cuda_stream_view stream = cudf::default_stream_value,
+  contains(column_view const &key, rmm::cuda_stream_view stream = cudf::get_default_stream(),
            rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) const;
 
 private:
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index f16ead009a8..979c1f9f772 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -486,7 +486,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_dropListDuplicatesWithKey
                   "Input column has child that does not have 2 children.", 0);
 
     return release_as_jlong(
-        cudf::jni::lists_distinct_by_key(lists_keys_vals, cudf::default_stream_value));
+        cudf::jni::lists_distinct_by_key(lists_keys_vals, cudf::get_default_stream()));
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/main/native/src/ColumnViewJni.hpp b/java/src/main/native/src/ColumnViewJni.hpp
index 2cbdb65653e..29158cbd98f 100644
--- a/java/src/main/native/src/ColumnViewJni.hpp
+++ b/java/src/main/native/src/ColumnViewJni.hpp
@@ -51,7 +51,7 @@ new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar,
  */
 std::unique_ptr<cudf::column>
 generate_list_offsets(cudf::column_view const &list_length,
-                      rmm::cuda_stream_view stream = cudf::default_stream_value);
+                      rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Perform a special treatment for the results of `cudf::lists::have_overlap` to produce the
@@ -73,7 +73,7 @@ generate_list_offsets(cudf::column_view const &list_length,
  */
 void post_process_list_overlap(cudf::column_view const &lhs, cudf::column_view const &rhs,
                                std::unique_ptr<cudf::column> const &overlap_result,
-                               rmm::cuda_stream_view stream = cudf::default_stream_value);
+                               rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Generates lists column by copying elements that are distinct by key from each input list
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index cbd0aee335e..b70a7b5a615 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -3467,7 +3467,7 @@ JNIEXPORT jobject JNICALL Java_ai_rapids_cudf_Table_contiguousSplitGroups(
       auto const size = cudf::distance(begin, end);
       auto const vec = thrust::host_vector<cudf::size_type>(begin, end);
       auto buf = rmm::device_buffer{vec.data(), size * sizeof(cudf::size_type),
-                                    cudf::default_stream_value};
+                                    cudf::get_default_stream()};
       auto gather_map_col = std::make_unique<cudf::column>(cudf::data_type{cudf::type_id::INT32},
                                                            size, std::move(buf));
 
diff --git a/java/src/main/native/src/aggregation128_utils.hpp b/java/src/main/native/src/aggregation128_utils.hpp
index 70658976dad..a1437606cdf 100644
--- a/java/src/main/native/src/aggregation128_utils.hpp
+++ b/java/src/main/native/src/aggregation128_utils.hpp
@@ -41,7 +41,7 @@ namespace cudf::jni {
  */
 std::unique_ptr<cudf::column>
 extract_chunk32(cudf::column_view const &col, cudf::data_type dtype, int chunk_idx,
-                rmm::cuda_stream_view stream = cudf::default_stream_value);
+                rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 /**
  * @brief Reassemble a 128-bit column from four 64-bit integer columns with overflow detection.
@@ -65,6 +65,6 @@ extract_chunk32(cudf::column_view const &col, cudf::data_type dtype, int chunk_i
  */
 std::unique_ptr<cudf::table>
 assemble128_from_sum(cudf::table_view const &chunks_table, cudf::data_type output_type,
-                     rmm::cuda_stream_view stream = cudf::default_stream_value);
+                     rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 } // namespace cudf::jni
diff --git a/java/src/main/native/src/row_conversion.cu b/java/src/main/native/src/row_conversion.cu
index 578915ee2ce..3913de720f9 100644
--- a/java/src/main/native/src/row_conversion.cu
+++ b/java/src/main/native/src/row_conversion.cu
@@ -1885,7 +1885,7 @@ std::vector<std::unique_ptr<column>> convert_to_rows(
 
                    return make_lists_column(
                        batch_info.row_batches[batch].row_count, std::move(offsets), std::move(data),
-                       0, rmm::device_buffer{0, cudf::default_stream_value, mr}, stream, mr);
+                       0, rmm::device_buffer{0, cudf::get_default_stream(), mr}, stream, mr);
                  });
 
   return ret;
diff --git a/java/src/main/native/src/row_conversion.hpp b/java/src/main/native/src/row_conversion.hpp
index e260ea44089..e4631875152 100644
--- a/java/src/main/native/src/row_conversion.hpp
+++ b/java/src/main/native/src/row_conversion.hpp
@@ -29,23 +29,23 @@ namespace jni {
 std::vector<std::unique_ptr<cudf::column>> convert_to_rows_fixed_width_optimized(
     cudf::table_view const &tbl,
     // TODO need something for validity
-    rmm::cuda_stream_view stream = cudf::default_stream_value,
+    rmm::cuda_stream_view stream = cudf::get_default_stream(),
     rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
 std::vector<std::unique_ptr<cudf::column>>
 convert_to_rows(cudf::table_view const &tbl,
                 // TODO need something for validity
-                rmm::cuda_stream_view stream = cudf::default_stream_value,
+                rmm::cuda_stream_view stream = cudf::get_default_stream(),
                 rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<cudf::table> convert_from_rows_fixed_width_optimized(
     cudf::lists_column_view const &input, std::vector<cudf::data_type> const &schema,
-    rmm::cuda_stream_view stream = cudf::default_stream_value,
+    rmm::cuda_stream_view stream = cudf::get_default_stream(),
     rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<cudf::table>
 convert_from_rows(cudf::lists_column_view const &input, std::vector<cudf::data_type> const &schema,
-                  rmm::cuda_stream_view stream = cudf::default_stream_value,
+                  rmm::cuda_stream_view stream = cudf::get_default_stream(),
                   rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource());
 
 } // namespace jni
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index dfef1be39f5..89952dadb6c 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -40,7 +40,7 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
 
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
 {
-  return detail::to_string_view_array(input, rmm::cuda_stream_default);
+  return detail::to_string_view_array(input, cudf::get_default_stream());
 }
 
 }  // namespace udf

From 9c06330363db4da99803a3728b8bf44f9829f0b9 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 21 Oct 2022 08:23:14 -0700
Subject: [PATCH 058/202] Accept const refs instead of const unique_ptr refs in
 reduce and scan APIs. (#11960)

There is almost never a good reason to pass arguments as `unique_ptr<T> const&`. Since those arguments cannot be modified, the only use case is accessing the underlying pointer, at which point the function better communicates its intent by accepting the underlying pointer/reference as an argument instead and is also more flexible as a result.

Resolves #10393

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11960
---
 cpp/benchmarks/reduction/anyall.cpp           |   2 +-
 cpp/benchmarks/reduction/dictionary.cpp       |   2 +-
 cpp/benchmarks/reduction/reduce.cpp           |   2 +-
 cpp/benchmarks/reduction/scan.cpp             |   2 +-
 cpp/include/cudf/detail/scan.hpp              |   8 +-
 cpp/include/cudf/reduction.hpp                |   6 +-
 cpp/src/reductions/reductions.cpp             |  63 +-
 cpp/src/reductions/scan/scan.cpp              |   8 +-
 cpp/src/reductions/scan/scan.cuh              |   4 +-
 cpp/src/reductions/scan/scan_exclusive.cu     |   2 +-
 cpp/src/reductions/scan/scan_inclusive.cu     |   2 +-
 cpp/tests/quantiles/percentile_approx_test.cu |   2 +-
 cpp/tests/reductions/collect_ops_tests.cpp    |  22 +-
 cpp/tests/reductions/list_rank_test.cpp       |  16 +-
 cpp/tests/reductions/rank_tests.cpp           |  62 +-
 cpp/tests/reductions/reduction_tests.cpp      | 565 +++++++++---------
 cpp/tests/reductions/scan_tests.cpp           | 155 +++--
 cpp/tests/reductions/tdigest_tests.cu         |   6 +-
 java/src/main/native/src/ColumnViewJni.cpp    |  10 +-
 python/cudf/cudf/_lib/cpp/reduce.pxd          |   4 +-
 python/cudf/cudf/_lib/reduce.pyx              |   6 +-
 21 files changed, 478 insertions(+), 471 deletions(-)

diff --git a/cpp/benchmarks/reduction/anyall.cpp b/cpp/benchmarks/reduction/anyall.cpp
index 80a85b0f217..755fa1ca2ad 100644
--- a/cpp/benchmarks/reduction/anyall.cpp
+++ b/cpp/benchmarks/reduction/anyall.cpp
@@ -41,7 +41,7 @@ void BM_reduction_anyall(benchmark::State& state,
 
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
-    auto result = cudf::reduce(*values, agg, output_dtype);
+    auto result = cudf::reduce(*values, *agg, output_dtype);
   }
 }
 
diff --git a/cpp/benchmarks/reduction/dictionary.cpp b/cpp/benchmarks/reduction/dictionary.cpp
index 219564d6b5c..8f2f0be33ca 100644
--- a/cpp/benchmarks/reduction/dictionary.cpp
+++ b/cpp/benchmarks/reduction/dictionary.cpp
@@ -51,7 +51,7 @@ void BM_reduction_dictionary(benchmark::State& state,
 
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
-    auto result = cudf::reduce(*values, agg, output_dtype);
+    auto result = cudf::reduce(*values, *agg, output_dtype);
   }
 }
 
diff --git a/cpp/benchmarks/reduction/reduce.cpp b/cpp/benchmarks/reduction/reduce.cpp
index 4e354352c11..4dfa7f0bbdc 100644
--- a/cpp/benchmarks/reduction/reduce.cpp
+++ b/cpp/benchmarks/reduction/reduce.cpp
@@ -45,7 +45,7 @@ void BM_reduction(benchmark::State& state, std::unique_ptr<cudf::reduce_aggregat
 
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
-    auto result = cudf::reduce(*input_column, agg, output_dtype);
+    auto result = cudf::reduce(*input_column, *agg, output_dtype);
   }
 }
 
diff --git a/cpp/benchmarks/reduction/scan.cpp b/cpp/benchmarks/reduction/scan.cpp
index 354333ea411..592eed1210a 100644
--- a/cpp/benchmarks/reduction/scan.cpp
+++ b/cpp/benchmarks/reduction/scan.cpp
@@ -38,7 +38,7 @@ static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
   for (auto _ : state) {
     cuda_event_timer timer(state, true);
     auto result = cudf::scan(
-      *column, cudf::make_min_aggregation<cudf::scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+      *column, *cudf::make_min_aggregation<cudf::scan_aggregation>(), cudf::scan_type::INCLUSIVE);
   }
 }
 
diff --git a/cpp/include/cudf/detail/scan.hpp b/cpp/include/cudf/detail/scan.hpp
index 13dddd3b0c8..f4b2d51d0cb 100644
--- a/cpp/include/cudf/detail/scan.hpp
+++ b/cpp/include/cudf/detail/scan.hpp
@@ -38,7 +38,7 @@ namespace detail {
  *                           `agg` is not Min or Max.
  *
  * @param input The input column view for the scan.
- * @param agg unique_ptr to aggregation operator applied by the scan.
+ * @param agg Aggregation operator applied by the scan
  * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
  *                      Include nulls if null_policy::INCLUDE. Any operation with a null results in
  *                      a null.
@@ -47,7 +47,7 @@ namespace detail {
  * @returns Column with scan results.
  */
 std::unique_ptr<column> scan_exclusive(column_view const& input,
-                                       std::unique_ptr<scan_aggregation> const& agg,
+                                       scan_aggregation const& agg,
                                        null_policy null_handling,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr);
@@ -64,7 +64,7 @@ std::unique_ptr<column> scan_exclusive(column_view const& input,
  *                           but the `agg` is not Min or Max.
  *
  * @param input The input column view for the scan.
- * @param agg unique_ptr to aggregation operator applied by the scan.
+ * @param agg Aggregation operator applied by the scan
  * @param null_handling Exclude null values when computing the result if null_policy::EXCLUDE.
  *                      Include nulls if null_policy::INCLUDE. Any operation with a null results in
  *                      a null.
@@ -73,7 +73,7 @@ std::unique_ptr<column> scan_exclusive(column_view const& input,
  * @returns Column with scan results.
  */
 std::unique_ptr<column> scan_inclusive(column_view const& input,
-                                       std::unique_ptr<scan_aggregation> const& agg,
+                                       scan_aggregation const& agg,
                                        null_policy null_handling,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr);
diff --git a/cpp/include/cudf/reduction.hpp b/cpp/include/cudf/reduction.hpp
index 083892aa856..7aa7ada6896 100644
--- a/cpp/include/cudf/reduction.hpp
+++ b/cpp/include/cudf/reduction.hpp
@@ -72,7 +72,7 @@ enum class scan_type : bool { INCLUSIVE, EXCLUSIVE };
  */
 std::unique_ptr<scalar> reduce(
   column_view const& col,
-  std::unique_ptr<reduce_aggregation> const& agg,
+  reduce_aggregation const& agg,
   data_type output_dtype,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -89,7 +89,7 @@ std::unique_ptr<scalar> reduce(
  */
 std::unique_ptr<scalar> reduce(
   column_view const& col,
-  std::unique_ptr<reduce_aggregation> const& agg,
+  reduce_aggregation const& agg,
   data_type output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
@@ -187,7 +187,7 @@ std::unique_ptr<column> segmented_reduce(
  */
 std::unique_ptr<column> scan(
   const column_view& input,
-  std::unique_ptr<scan_aggregation> const& agg,
+  scan_aggregation const& agg,
   scan_type inclusive,
   null_policy null_handling           = null_policy::EXCLUDE,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index 4166becbf4d..a7d7e14a193 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -49,7 +49,7 @@ struct reduce_dispatch_functor {
   }
 
   template <aggregation::Kind k>
-  std::unique_ptr<scalar> operator()(std::unique_ptr<reduce_aggregation> const& agg)
+  std::unique_ptr<scalar> operator()(reduce_aggregation const& agg)
   {
     switch (k) {
       case aggregation::SUM: return reduction::sum(col, output_dtype, init, stream, mr);
@@ -62,12 +62,12 @@ struct reduce_dispatch_functor {
         return reduction::sum_of_squares(col, output_dtype, stream, mr);
       case aggregation::MEAN: return reduction::mean(col, output_dtype, stream, mr);
       case aggregation::VARIANCE: {
-        auto var_agg = dynamic_cast<var_aggregation const*>(agg.get());
-        return reduction::variance(col, output_dtype, var_agg->_ddof, stream, mr);
+        auto var_agg = static_cast<var_aggregation const&>(agg);
+        return reduction::variance(col, output_dtype, var_agg._ddof, stream, mr);
       }
       case aggregation::STD: {
-        auto var_agg = dynamic_cast<std_aggregation const*>(agg.get());
-        return reduction::standard_deviation(col, output_dtype, var_agg->_ddof, stream, mr);
+        auto var_agg = static_cast<std_aggregation const&>(agg);
+        return reduction::standard_deviation(col, output_dtype, var_agg._ddof, stream, mr);
       }
       case aggregation::MEDIAN: {
         auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream);
@@ -78,60 +78,59 @@ struct reduce_dispatch_functor {
         return get_element(*col_ptr, 0, stream, mr);
       }
       case aggregation::QUANTILE: {
-        auto quantile_agg = dynamic_cast<quantile_aggregation const*>(agg.get());
-        CUDF_EXPECTS(quantile_agg->_quantiles.size() == 1,
+        auto quantile_agg = static_cast<quantile_aggregation const&>(agg);
+        CUDF_EXPECTS(quantile_agg._quantiles.size() == 1,
                      "Reduction quantile accepts only one quantile value");
         auto sorted_indices = sorted_order(table_view{{col}}, {}, {null_order::AFTER}, stream);
         auto valid_sorted_indices =
           split(*sorted_indices, {col.size() - col.null_count()}, stream)[0];
 
         auto col_ptr = quantile(col,
-                                quantile_agg->_quantiles,
-                                quantile_agg->_interpolation,
+                                quantile_agg._quantiles,
+                                quantile_agg._interpolation,
                                 valid_sorted_indices,
                                 true,
                                 stream);
         return get_element(*col_ptr, 0, stream, mr);
       }
       case aggregation::NUNIQUE: {
-        auto nunique_agg = dynamic_cast<nunique_aggregation const*>(agg.get());
+        auto nunique_agg = static_cast<nunique_aggregation const&>(agg);
         return make_fixed_width_scalar(
-          detail::distinct_count(
-            col, nunique_agg->_null_handling, nan_policy::NAN_IS_VALID, stream),
+          detail::distinct_count(col, nunique_agg._null_handling, nan_policy::NAN_IS_VALID, stream),
           stream,
           mr);
       }
       case aggregation::NTH_ELEMENT: {
-        auto nth_agg = dynamic_cast<nth_element_aggregation const*>(agg.get());
-        return reduction::nth_element(col, nth_agg->_n, nth_agg->_null_handling, stream, mr);
+        auto nth_agg = static_cast<nth_element_aggregation const&>(agg);
+        return reduction::nth_element(col, nth_agg._n, nth_agg._null_handling, stream, mr);
       }
       case aggregation::COLLECT_LIST: {
-        auto col_agg = dynamic_cast<collect_list_aggregation const*>(agg.get());
-        return reduction::collect_list(col, col_agg->_null_handling, stream, mr);
+        auto col_agg = static_cast<collect_list_aggregation const&>(agg);
+        return reduction::collect_list(col, col_agg._null_handling, stream, mr);
       }
       case aggregation::COLLECT_SET: {
-        auto col_agg = dynamic_cast<collect_set_aggregation const*>(agg.get());
+        auto col_agg = static_cast<collect_set_aggregation const&>(agg);
         return reduction::collect_set(
-          col, col_agg->_null_handling, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr);
+          col, col_agg._null_handling, col_agg._nulls_equal, col_agg._nans_equal, stream, mr);
       }
       case aggregation::MERGE_LISTS: {
         return reduction::merge_lists(col, stream, mr);
       }
       case aggregation::MERGE_SETS: {
-        auto col_agg = dynamic_cast<merge_sets_aggregation const*>(agg.get());
-        return reduction::merge_sets(col, col_agg->_nulls_equal, col_agg->_nans_equal, stream, mr);
+        auto col_agg = static_cast<merge_sets_aggregation const&>(agg);
+        return reduction::merge_sets(col, col_agg._nulls_equal, col_agg._nans_equal, stream, mr);
       }
       case aggregation::TDIGEST: {
         CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
                      "Tdigest aggregations expect output type to be STRUCT");
-        auto td_agg = dynamic_cast<tdigest_aggregation const*>(agg.get());
-        return detail::tdigest::reduce_tdigest(col, td_agg->max_centroids, stream, mr);
+        auto td_agg = static_cast<tdigest_aggregation const&>(agg);
+        return detail::tdigest::reduce_tdigest(col, td_agg.max_centroids, stream, mr);
       }
       case aggregation::MERGE_TDIGEST: {
         CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
                      "Tdigest aggregations expect output type to be STRUCT");
-        auto td_agg = dynamic_cast<merge_tdigest_aggregation const*>(agg.get());
-        return detail::tdigest::reduce_merge_tdigest(col, td_agg->max_centroids, stream, mr);
+        auto td_agg = static_cast<merge_tdigest_aggregation const&>(agg);
+        return detail::tdigest::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr);
       }
       default: CUDF_FAIL("Unsupported reduction operator");
     }
@@ -140,7 +139,7 @@ struct reduce_dispatch_functor {
 
 std::unique_ptr<scalar> reduce(
   column_view const& col,
-  std::unique_ptr<reduce_aggregation> const& agg,
+  reduce_aggregation const& agg,
   data_type output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
@@ -148,16 +147,16 @@ std::unique_ptr<scalar> reduce(
 {
   CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(),
                "column and initial value must be the same type");
-  if (init.has_value() && !(agg->kind == aggregation::SUM || agg->kind == aggregation::PRODUCT ||
-                            agg->kind == aggregation::MIN || agg->kind == aggregation::MAX ||
-                            agg->kind == aggregation::ANY || agg->kind == aggregation::ALL)) {
+  if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT ||
+                            agg.kind == aggregation::MIN || agg.kind == aggregation::MAX ||
+                            agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) {
     CUDF_FAIL(
       "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types");
   }
   // Returns default scalar if input column is non-valid. In terms of nested columns, we need to
   // handcraft the default scalar with input column.
   if (col.size() <= col.null_count()) {
-    if (agg->kind == aggregation::TDIGEST || agg->kind == aggregation::MERGE_TDIGEST) {
+    if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) {
       return detail::tdigest::make_empty_tdigest_scalar();
     }
     if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) {
@@ -176,12 +175,12 @@ std::unique_ptr<scalar> reduce(
   }
 
   return aggregation_dispatcher(
-    agg->kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg);
+    agg.kind, reduce_dispatch_functor{col, output_dtype, init, stream, mr}, agg);
 }
 }  // namespace detail
 
 std::unique_ptr<scalar> reduce(column_view const& col,
-                               std::unique_ptr<reduce_aggregation> const& agg,
+                               reduce_aggregation const& agg,
                                data_type output_dtype,
                                rmm::mr::device_memory_resource* mr)
 {
@@ -190,7 +189,7 @@ std::unique_ptr<scalar> reduce(column_view const& col,
 }
 
 std::unique_ptr<scalar> reduce(column_view const& col,
-                               std::unique_ptr<reduce_aggregation> const& agg,
+                               reduce_aggregation const& agg,
                                data_type output_dtype,
                                std::optional<std::reference_wrapper<scalar const>> init,
                                rmm::mr::device_memory_resource* mr)
diff --git a/cpp/src/reductions/scan/scan.cpp b/cpp/src/reductions/scan/scan.cpp
index c0b787b3a1d..2871ee283ba 100644
--- a/cpp/src/reductions/scan/scan.cpp
+++ b/cpp/src/reductions/scan/scan.cpp
@@ -25,16 +25,16 @@ namespace cudf {
 
 namespace detail {
 std::unique_ptr<column> scan(column_view const& input,
-                             std::unique_ptr<scan_aggregation> const& agg,
+                             scan_aggregation const& agg,
                              scan_type inclusive,
                              null_policy null_handling,
                              rmm::cuda_stream_view stream,
                              rmm::mr::device_memory_resource* mr)
 {
-  if (agg->kind == aggregation::RANK) {
+  if (agg.kind == aggregation::RANK) {
     CUDF_EXPECTS(inclusive == scan_type::INCLUSIVE,
                  "Rank aggregation operator requires an inclusive scan");
-    auto const& rank_agg = dynamic_cast<cudf::detail::rank_aggregation const&>(*agg);
+    auto const& rank_agg = static_cast<cudf::detail::rank_aggregation const&>(agg);
     if (rank_agg._method == rank_method::MIN) {
       if (rank_agg._percentage == rank_percentage::NONE) {
         return inclusive_rank_scan(input, stream, mr);
@@ -55,7 +55,7 @@ std::unique_ptr<column> scan(column_view const& input,
 }  // namespace detail
 
 std::unique_ptr<column> scan(column_view const& input,
-                             std::unique_ptr<scan_aggregation> const& agg,
+                             scan_aggregation const& agg,
                              scan_type inclusive,
                              null_policy null_handling,
                              rmm::mr::device_memory_resource* mr)
diff --git a/cpp/src/reductions/scan/scan.cuh b/cpp/src/reductions/scan/scan.cuh
index 127f2ae95b4..2ad6124cdd0 100644
--- a/cpp/src/reductions/scan/scan.cuh
+++ b/cpp/src/reductions/scan/scan.cuh
@@ -35,12 +35,12 @@ rmm::device_buffer mask_scan(column_view const& input_view,
 
 template <template <typename> typename DispatchFn>
 std::unique_ptr<column> scan_agg_dispatch(const column_view& input,
-                                          std::unique_ptr<scan_aggregation> const& agg,
+                                          scan_aggregation const& agg,
                                           null_policy null_handling,
                                           rmm::cuda_stream_view stream,
                                           rmm::mr::device_memory_resource* mr)
 {
-  switch (agg->kind) {
+  switch (agg.kind) {
     case aggregation::SUM:
       return type_dispatcher<dispatch_storage_type>(
         input.type(), DispatchFn<DeviceSum>(), input, null_handling, stream, mr);
diff --git a/cpp/src/reductions/scan/scan_exclusive.cu b/cpp/src/reductions/scan/scan_exclusive.cu
index 885d7e904b4..0c2973d63b1 100644
--- a/cpp/src/reductions/scan/scan_exclusive.cu
+++ b/cpp/src/reductions/scan/scan_exclusive.cu
@@ -81,7 +81,7 @@ struct scan_dispatcher {
 }  // namespace
 
 std::unique_ptr<column> scan_exclusive(const column_view& input,
-                                       std::unique_ptr<scan_aggregation> const& agg,
+                                       scan_aggregation const& agg,
                                        null_policy null_handling,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
diff --git a/cpp/src/reductions/scan/scan_inclusive.cu b/cpp/src/reductions/scan/scan_inclusive.cu
index df5b5008e5b..99c67a563e4 100644
--- a/cpp/src/reductions/scan/scan_inclusive.cu
+++ b/cpp/src/reductions/scan/scan_inclusive.cu
@@ -248,7 +248,7 @@ struct scan_dispatcher {
 
 std::unique_ptr<column> scan_inclusive(
   column_view const& input,
-  std::unique_ptr<scan_aggregation> const& agg,
+  scan_aggregation const& agg,
   null_policy null_handling,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cu
index 82151caea53..a26f6a99ba6 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cu
+++ b/cpp/tests/quantiles/percentile_approx_test.cu
@@ -184,7 +184,7 @@ void percentile_approx_test(column_view const& _keys,
       // result is a scalar, but we want to extract out the underlying column
       auto scalar_result =
         cudf::reduce(values,
-                     cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
+                     *cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
                      data_type{type_id::STRUCT});
       auto tbl = static_cast<cudf::struct_scalar const*>(scalar_result.get())->view();
       std::vector<std::unique_ptr<cudf::column>> cols;
diff --git a/cpp/tests/reductions/collect_ops_tests.cpp b/cpp/tests/reductions/collect_ops_tests.cpp
index 842aaa3ab07..2bb13fd671f 100644
--- a/cpp/tests/reductions/collect_ops_tests.cpp
+++ b/cpp/tests/reductions/collect_ops_tests.cpp
@@ -31,7 +31,7 @@ namespace {
 
 auto collect_set(cudf::column_view const& input, std::unique_ptr<reduce_aggregation> const& agg)
 {
-  auto const result_scalar = cudf::reduce(input, agg, data_type{type_id::LIST});
+  auto const result_scalar = cudf::reduce(input, *agg, data_type{type_id::LIST});
 
   // The results of `collect_set` are unordered thus we need to sort them for comparison.
   auto const result_sorted_table =
@@ -63,20 +63,20 @@ TYPED_TEST(CollectTestFixedWidth, CollectList)
   // null_include without nulls
   fw_wrapper col(values.begin(), values.end());
   auto const ret = cudf::reduce(
-    col, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    col, *make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(col, dynamic_cast<list_scalar*>(ret.get())->view());
 
   // null_include with nulls
   fw_wrapper col_with_null(values.begin(), values.end(), null_mask.begin());
   auto const ret1 = cudf::reduce(
-    col_with_null, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    col_with_null, *make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_with_null, dynamic_cast<list_scalar*>(ret1.get())->view());
 
   // null_exclude with nulls
   fw_wrapper col_null_filtered{{5, 0, -111, 0, 64, 99, -16}};
   auto const ret2 =
     cudf::reduce(col_with_null,
-                 make_collect_list_aggregation<reduce_aggregation>(null_policy::EXCLUDE),
+                 *make_collect_list_aggregation<reduce_aggregation>(null_policy::EXCLUDE),
                  data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(col_null_filtered, dynamic_cast<list_scalar*>(ret2.get())->view());
 }
@@ -128,7 +128,7 @@ TYPED_TEST(CollectTestFixedWidth, MergeLists)
   auto const lists1    = lists_col{{1, 2, 3}, {}, {}, {4}, {5, 6, 7}, {8, 9}, {}};
   auto const expected1 = fw_wrapper{{1, 2, 3, 4, 5, 6, 7, 8, 9}};
   auto const ret1      = cudf::reduce(
-    lists1, make_merge_lists_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    lists1, *make_merge_lists_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast<list_scalar*>(ret1.get())->view());
 
   // test with nulls
@@ -145,7 +145,7 @@ TYPED_TEST(CollectTestFixedWidth, MergeLists)
   auto const expected2 = fw_wrapper{{1, 2, 3, 0, 4, 0, 5, 0, 0, 0, 6, 7, 8, 9},
                                     {1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1}};
   auto const ret2      = cudf::reduce(
-    lists2, make_merge_lists_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    lists2, *make_merge_lists_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast<list_scalar*>(ret2.get())->view());
 }
 
@@ -278,14 +278,14 @@ TEST_F(CollectTest, CollectStrings)
 
   // collect_list including nulls
   auto const ret1 = cudf::reduce(
-    s_col, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    s_col, *make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(s_col, dynamic_cast<list_scalar*>(ret1.get())->view());
 
   // collect_list excluding nulls
   auto const expected2 = str_col{"a", "a", "b", "b", "c", "d", "e", "e"};
   auto const ret2 =
     cudf::reduce(s_col,
-                 make_collect_list_aggregation<reduce_aggregation>(null_policy::EXCLUDE),
+                 *make_collect_list_aggregation<reduce_aggregation>(null_policy::EXCLUDE),
                  data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast<list_scalar*>(ret2.get())->view());
 
@@ -313,7 +313,7 @@ TEST_F(CollectTest, CollectStrings)
   auto const expected5 = str_col{{"a", "a", "b", "b", "null", "c", "null", "d", "null", "e"},
                                  {1, 1, 1, 1, 0, 1, 0, 1, 0, 1}};
   auto const ret5      = cudf::reduce(
-    strings, make_merge_lists_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    strings, *make_merge_lists_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected5, dynamic_cast<list_scalar*>(ret5.get())->view());
 
   // merge_sets with null_equal
@@ -336,7 +336,7 @@ TEST_F(CollectTest, CollectEmptys)
   // test collect empty columns
   auto empty = int_col{};
   auto ret   = cudf::reduce(
-    empty, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    empty, *make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col{}, dynamic_cast<list_scalar*>(ret.get())->view());
 
   ret = collect_set(empty, make_collect_set_aggregation<reduce_aggregation>());
@@ -345,7 +345,7 @@ TEST_F(CollectTest, CollectEmptys)
   // test collect all null columns
   auto all_nulls = int_col{{1, 2, 3, 4, 5}, {0, 0, 0, 0, 0}};
   ret            = cudf::reduce(
-    all_nulls, make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
+    all_nulls, *make_collect_list_aggregation<reduce_aggregation>(), data_type{type_id::LIST});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(int_col{}, dynamic_cast<list_scalar*>(ret.get())->view());
 
   ret = collect_set(all_nulls, make_collect_set_aggregation<reduce_aggregation>());
diff --git a/cpp/tests/reductions/list_rank_test.cpp b/cpp/tests/reductions/list_rank_test.cpp
index 1409ccd1311..5f3ab1636ef 100644
--- a/cpp/tests/reductions/list_rank_test.cpp
+++ b/cpp/tests/reductions/list_rank_test.cpp
@@ -27,7 +27,7 @@
 struct ListRankScanTest : public cudf::test::BaseFixture {
   inline void test_ungrouped_rank_scan(cudf::column_view const& input,
                                        cudf::column_view const& expect_vals,
-                                       std::unique_ptr<cudf::scan_aggregation> const& agg,
+                                       cudf::scan_aggregation const& agg,
                                        cudf::null_policy null_handling)
   {
     auto col_out = cudf::scan(input, agg, cudf::scan_type::INCLUSIVE, null_handling);
@@ -46,7 +46,7 @@ TEST_F(ListRankScanTest, BasicList)
   this->test_ungrouped_rank_scan(
     col,
     expected_dense_vals,
-    cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+    *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
     cudf::null_policy::INCLUDE);
 }
 
@@ -78,7 +78,7 @@ TEST_F(ListRankScanTest, DeepList)
     this->test_ungrouped_rank_scan(
       col,
       expected_dense_vals,
-      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
       cudf::null_policy::INCLUDE);
   }
 
@@ -89,7 +89,7 @@ TEST_F(ListRankScanTest, DeepList)
     this->test_ungrouped_rank_scan(
       sliced_col,
       expected_dense_vals,
-      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
       cudf::null_policy::INCLUDE);
   }
 }
@@ -145,7 +145,7 @@ TEST_F(ListRankScanTest, ListOfStruct)
     this->test_ungrouped_rank_scan(
       list_column,
       expect,
-      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
       cudf::null_policy::INCLUDE);
   }
 
@@ -157,7 +157,7 @@ TEST_F(ListRankScanTest, ListOfStruct)
     this->test_ungrouped_rank_scan(
       sliced_col,
       expect,
-      cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+      *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
       cudf::null_policy::INCLUDE);
   }
 }
@@ -201,7 +201,7 @@ TEST_F(ListRankScanTest, ListOfEmptyStruct)
   this->test_ungrouped_rank_scan(
     *list_column,
     expect,
-    cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+    *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
     cudf::null_policy::INCLUDE);
 }
 
@@ -231,6 +231,6 @@ TEST_F(ListRankScanTest, EmptyDeepList)
   this->test_ungrouped_rank_scan(
     *list_column,
     expect,
-    cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
+    *cudf::make_rank_aggregation<cudf::scan_aggregation>(cudf::rank_method::DENSE),
     cudf::null_policy::INCLUDE);
 }
diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp
index 5e90e5cfed8..73b721d5d85 100644
--- a/cpp/tests/reductions/rank_tests.cpp
+++ b/cpp/tests/reductions/rank_tests.cpp
@@ -52,7 +52,7 @@ template <typename T>
 struct TypedRankScanTest : BaseScanTest<T> {
   inline void test_ungrouped_rank_scan(cudf::column_view const& input,
                                        cudf::column_view const& expect_vals,
-                                       std::unique_ptr<scan_aggregation> const& agg)
+                                       scan_aggregation const& agg)
   {
     auto col_out = cudf::scan(input, agg, INCLUSIVE_SCAN, INCLUDE_NULLS);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expect_vals, col_out->view());
@@ -90,9 +90,9 @@ TYPED_TEST(TypedRankScanTest, Rank)
                                                    6.0 / 11,
                                                    10.0 / 11,
                                                    11.0 / 11};
-  this->test_ungrouped_rank_scan(*col, expected_dense, dense_rank);
-  this->test_ungrouped_rank_scan(*col, expected_rank, rank);
-  this->test_ungrouped_rank_scan(*col, expected_percent, percent_rank);
+  this->test_ungrouped_rank_scan(*col, expected_dense, *dense_rank);
+  this->test_ungrouped_rank_scan(*col, expected_rank, *rank);
+  this->test_ungrouped_rank_scan(*col, expected_percent, *percent_rank);
 }
 
 TYPED_TEST(TypedRankScanTest, RankWithNulls)
@@ -120,9 +120,9 @@ TYPED_TEST(TypedRankScanTest, RankWithNulls)
                                                    8.0 / 11,
                                                    10.0 / 11,
                                                    11.0 / 11};
-  this->test_ungrouped_rank_scan(*col, expected_dense, dense_rank);
-  this->test_ungrouped_rank_scan(*col, expected_rank, rank);
-  this->test_ungrouped_rank_scan(*col, expected_percent, percent_rank);
+  this->test_ungrouped_rank_scan(*col, expected_dense, *dense_rank);
+  this->test_ungrouped_rank_scan(*col, expected_rank, *rank);
+  this->test_ungrouped_rank_scan(*col, expected_percent, *percent_rank);
 }
 
 namespace {
@@ -172,9 +172,9 @@ TYPED_TEST(TypedRankScanTest, MixedStructs)
                                                    9.0 / 11,
                                                    11.0 / 11};
 
-  this->test_ungrouped_rank_scan(struct_col, expected_dense, dense_rank);
-  this->test_ungrouped_rank_scan(struct_col, expected_rank, rank);
-  this->test_ungrouped_rank_scan(struct_col, expected_percent, percent_rank);
+  this->test_ungrouped_rank_scan(struct_col, expected_dense, *dense_rank);
+  this->test_ungrouped_rank_scan(struct_col, expected_rank, *rank);
+  this->test_ungrouped_rank_scan(struct_col, expected_percent, *percent_rank);
 }
 
 TYPED_TEST(TypedRankScanTest, NestedStructs)
@@ -196,16 +196,16 @@ TYPED_TEST(TypedRankScanTest, NestedStructs)
     return structs_column_wrapper{{col, strings_col, nuther_col}};
   }();
 
-  auto const dense_out      = cudf::scan(nested_col, dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto const dense_expected = cudf::scan(flat_col, dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const dense_out      = cudf::scan(nested_col, *dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const dense_expected = cudf::scan(flat_col, *dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), dense_expected->view());
 
-  auto const rank_out      = cudf::scan(nested_col, rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto const rank_expected = cudf::scan(flat_col, rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const rank_out      = cudf::scan(nested_col, *rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const rank_expected = cudf::scan(flat_col, *rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), rank_expected->view());
 
-  auto const percent_out      = cudf::scan(nested_col, percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto const percent_expected = cudf::scan(flat_col, percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const percent_out = cudf::scan(nested_col, *percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const percent_expected = cudf::scan(flat_col, *percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(percent_out->view(), percent_expected->view());
 }
 
@@ -220,9 +220,9 @@ TYPED_TEST(TypedRankScanTest, StructsWithNullPushdown)
     auto const expected_null_result = rank_result_col{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
     auto const expected_percent_rank_null_result =
       percent_result_col{0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
-    auto const dense_out   = cudf::scan(*struct_col, dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-    auto const rank_out    = cudf::scan(*struct_col, rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-    auto const percent_out = cudf::scan(*struct_col, percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    auto const dense_out   = cudf::scan(*struct_col, *dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    auto const rank_out    = cudf::scan(*struct_col, *rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    auto const percent_out = cudf::scan(*struct_col, *percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), expected_null_result);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), expected_null_result);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(percent_out->view(), expected_percent_rank_null_result);
@@ -248,9 +248,9 @@ TYPED_TEST(TypedRankScanTest, StructsWithNullPushdown)
                                                      9.0 / 11,
                                                      9.0 / 11,
                                                      11.0 / 11};
-    auto const dense_out   = cudf::scan(*struct_col, dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-    auto const rank_out    = cudf::scan(*struct_col, rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-    auto const percent_out = cudf::scan(*struct_col, percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    auto const dense_out   = cudf::scan(*struct_col, *dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    auto const rank_out    = cudf::scan(*struct_col, *rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+    auto const percent_out = cudf::scan(*struct_col, *percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(dense_out->view(), expected_dense);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(rank_out->view(), expected_rank);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(percent_out->view(), expected_percent);
@@ -278,9 +278,9 @@ TEST(RankScanTest, BoolRank)
                                                    3.0 / 11,
                                                    3.0 / 11};
 
-  auto const dense_out   = cudf::scan(vals, dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto const rank_out    = cudf::scan(vals, rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto const percent_out = cudf::scan(vals, percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const dense_out   = cudf::scan(vals, *dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const rank_out    = cudf::scan(vals, *rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto const percent_out = cudf::scan(vals, *percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_dense, dense_out->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_rank, rank_out->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_percent, percent_out->view());
@@ -304,9 +304,9 @@ TEST(RankScanTest, BoolRankWithNull)
                                                    8.0 / 11,
                                                    8.0 / 11};
 
-  auto nullable_dense_out   = cudf::scan(vals, dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto nullable_rank_out    = cudf::scan(vals, rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
-  auto nullable_percent_out = cudf::scan(vals, percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto nullable_dense_out   = cudf::scan(vals, *dense_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto nullable_rank_out    = cudf::scan(vals, *rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
+  auto nullable_percent_out = cudf::scan(vals, *percent_rank, INCLUSIVE_SCAN, INCLUDE_NULLS);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_dense, nullable_dense_out->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_rank, nullable_rank_out->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_percent, nullable_percent_out->view());
@@ -316,11 +316,11 @@ TEST(RankScanTest, ExclusiveScan)
 {
   auto const vals = input<uint32_t>{3, 4, 5};
 
-  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, dense_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
+  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, *dense_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
                             "Rank aggregation operator requires an inclusive scan");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
+  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, *rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
                             "Rank aggregation operator requires an inclusive scan");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, percent_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
+  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, *percent_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
                             "Rank aggregation operator requires an inclusive scan");
 }
 
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 7f04d3edb14..4bfecdfeb01 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -94,7 +94,7 @@ struct ReductionTest : public cudf::test::BaseFixture {
 
   template <typename T_out>
   std::pair<T_out, bool> reduction_test(cudf::column_view const& underlying_column,
-                                        std::unique_ptr<reduce_aggregation> const& agg,
+                                        reduce_aggregation const& agg,
                                         std::optional<cudf::data_type> _output_dtype = {})
   {
     auto const output_dtype                 = _output_dtype.value_or(underlying_column.type());
@@ -108,7 +108,7 @@ struct ReductionTest : public cudf::test::BaseFixture {
   template <typename T_out>
   std::pair<T_out, bool> reduction_test(cudf::column_view const& underlying_column,
                                         cudf::scalar const& initial_value,
-                                        std::unique_ptr<reduce_aggregation> const& agg,
+                                        reduce_aggregation const& agg,
                                         std::optional<cudf::data_type> _output_dtype = {})
   {
     auto const output_dtype = _output_dtype.value_or(underlying_column.type());
@@ -152,19 +152,19 @@ TYPED_TEST(MinMaxReductionTest, MinMaxTypes)
     v.begin(), v.end(), init_value, [](const T& a, const T& b) { return std::max<T>(a, b); });
 
   EXPECT_EQ(
-    this->template reduction_test<T>(col, cudf::make_min_aggregation<reduce_aggregation>()).first,
+    this->template reduction_test<T>(col, *cudf::make_min_aggregation<reduce_aggregation>()).first,
     expected_min_result);
   EXPECT_EQ(
-    this->template reduction_test<T>(col, cudf::make_max_aggregation<reduce_aggregation>()).first,
+    this->template reduction_test<T>(col, *cudf::make_max_aggregation<reduce_aggregation>()).first,
     expected_max_result);
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, *init_scalar, cudf::make_min_aggregation<reduce_aggregation>())
+                col, *init_scalar, *cudf::make_min_aggregation<reduce_aggregation>())
               .first,
             expected_min_init_result);
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, *init_scalar, cudf::make_max_aggregation<reduce_aggregation>())
+                col, *init_scalar, *cudf::make_max_aggregation<reduce_aggregation>())
               .first,
             expected_max_init_result);
 
@@ -194,21 +194,21 @@ TYPED_TEST(MinMaxReductionTest, MinMaxTypes)
     });
 
   EXPECT_EQ(
-    this->template reduction_test<T>(col_nulls, cudf::make_min_aggregation<reduce_aggregation>())
+    this->template reduction_test<T>(col_nulls, *cudf::make_min_aggregation<reduce_aggregation>())
       .first,
     expected_min_null_result);
   EXPECT_EQ(
-    this->template reduction_test<T>(col_nulls, cudf::make_max_aggregation<reduce_aggregation>())
+    this->template reduction_test<T>(col_nulls, *cudf::make_max_aggregation<reduce_aggregation>())
       .first,
     expected_max_null_result);
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col_nulls, *init_scalar, cudf::make_min_aggregation<reduce_aggregation>())
+                col_nulls, *init_scalar, *cudf::make_min_aggregation<reduce_aggregation>())
               .first,
             expected_min_init_null_result);
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col_nulls, *init_scalar, cudf::make_max_aggregation<reduce_aggregation>())
+                col_nulls, *init_scalar, *cudf::make_max_aggregation<reduce_aggregation>())
               .first,
             expected_max_init_null_result);
 
@@ -226,19 +226,19 @@ TYPED_TEST(MinMaxReductionTest, MinMaxTypes)
 
   EXPECT_FALSE(
     this
-      ->template reduction_test<T>(col_all_nulls, cudf::make_min_aggregation<reduce_aggregation>())
+      ->template reduction_test<T>(col_all_nulls, *cudf::make_min_aggregation<reduce_aggregation>())
       .second);
   EXPECT_FALSE(
     this
-      ->template reduction_test<T>(col_all_nulls, cudf::make_max_aggregation<reduce_aggregation>())
+      ->template reduction_test<T>(col_all_nulls, *cudf::make_max_aggregation<reduce_aggregation>())
       .second);
   EXPECT_FALSE(this
                  ->template reduction_test<T>(
-                   col_all_nulls, *init_scalar, cudf::make_min_aggregation<reduce_aggregation>())
+                   col_all_nulls, *init_scalar, *cudf::make_min_aggregation<reduce_aggregation>())
                  .second);
   EXPECT_FALSE(this
                  ->template reduction_test<T>(
-                   col_all_nulls, *init_scalar, cudf::make_max_aggregation<reduce_aggregation>())
+                   col_all_nulls, *init_scalar, *cudf::make_max_aggregation<reduce_aggregation>())
                  .second);
 
   auto all_null_res = cudf::minmax(col_all_nulls);
@@ -271,11 +271,11 @@ TYPED_TEST(SumReductionTest, Sum)
   T expected_value_init = std::accumulate(v.begin(), v.end(), init_value);
 
   EXPECT_EQ(
-    this->template reduction_test<T>(col, cudf::make_sum_aggregation<reduce_aggregation>()).first,
+    this->template reduction_test<T>(col, *cudf::make_sum_aggregation<reduce_aggregation>()).first,
     expected_value);
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, *init_scalar, cudf::make_sum_aggregation<reduce_aggregation>())
+                col, *init_scalar, *cudf::make_sum_aggregation<reduce_aggregation>())
               .first,
             expected_value_init);
 
@@ -286,12 +286,12 @@ TYPED_TEST(SumReductionTest, Sum)
   init_scalar->set_valid_async(false);
 
   EXPECT_EQ(
-    this->template reduction_test<T>(col_nulls, cudf::make_sum_aggregation<reduce_aggregation>())
+    this->template reduction_test<T>(col_nulls, *cudf::make_sum_aggregation<reduce_aggregation>())
       .first,
     expected_null_value);
   EXPECT_FALSE(this
                  ->template reduction_test<T>(
-                   col_nulls, *init_scalar, cudf::make_sum_aggregation<reduce_aggregation>())
+                   col_nulls, *init_scalar, *cudf::make_sum_aggregation<reduce_aggregation>())
                  .second);
 }
 
@@ -323,12 +323,12 @@ TYPED_TEST(ReductionTest, Product)
   TypeParam expected_value_init = calc_prod_init(v, init_value);
 
   EXPECT_EQ(
-    this->template reduction_test<T>(col, cudf::make_product_aggregation<reduce_aggregation>())
+    this->template reduction_test<T>(col, *cudf::make_product_aggregation<reduce_aggregation>())
       .first,
     expected_value);
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, *init_scalar, cudf::make_product_aggregation<reduce_aggregation>())
+                col, *init_scalar, *cudf::make_product_aggregation<reduce_aggregation>())
               .first,
             expected_value_init);
 
@@ -340,12 +340,12 @@ TYPED_TEST(ReductionTest, Product)
 
   EXPECT_EQ(
     this
-      ->template reduction_test<T>(col_nulls, cudf::make_product_aggregation<reduce_aggregation>())
+      ->template reduction_test<T>(col_nulls, *cudf::make_product_aggregation<reduce_aggregation>())
       .first,
     expected_null_value);
   EXPECT_FALSE(this
                  ->template reduction_test<T>(
-                   col_nulls, *init_scalar, cudf::make_product_aggregation<reduce_aggregation>())
+                   col_nulls, *init_scalar, *cudf::make_product_aggregation<reduce_aggregation>())
                  .second);
 }
 
@@ -365,11 +365,11 @@ TYPED_TEST(ReductionTest, SumOfSquare)
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   T expected_value = calc_reduction(v);
 
-  EXPECT_EQ(
-    this
-      ->template reduction_test<T>(col, cudf::make_sum_of_squares_aggregation<reduce_aggregation>())
-      .first,
-    expected_value);
+  EXPECT_EQ(this
+              ->template reduction_test<T>(
+                col, *cudf::make_sum_of_squares_aggregation<reduce_aggregation>())
+              .first,
+            expected_value);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
@@ -378,7 +378,7 @@ TYPED_TEST(ReductionTest, SumOfSquare)
 
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col_nulls, cudf::make_sum_of_squares_aggregation<reduce_aggregation>())
+                col_nulls, *cudf::make_sum_of_squares_aggregation<reduce_aggregation>())
               .first,
             expected_null_value);
 }
@@ -407,22 +407,22 @@ TYPED_TEST(ReductionAnyAllTest, AnyAllTrueTrue)
 
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                col, *init_scalar, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                col, *init_scalar, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
 
@@ -432,23 +432,23 @@ TYPED_TEST(ReductionAnyAllTest, AnyAllTrueTrue)
 
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col_nulls, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                col_nulls, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col_nulls, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                col_nulls, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_FALSE(
     this
       ->template reduction_test<bool>(
-        col_nulls, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+        col_nulls, *init_scalar, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
       .second);
   EXPECT_FALSE(
     this
       ->template reduction_test<bool>(
-        col_nulls, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+        col_nulls, *init_scalar, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
       .second);
 }
 
@@ -470,22 +470,22 @@ TYPED_TEST(ReductionAnyAllTest, AnyAllFalseFalse)
 
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                col, *init_scalar, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                col, *init_scalar, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
 
@@ -495,23 +495,23 @@ TYPED_TEST(ReductionAnyAllTest, AnyAllFalseFalse)
 
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col_nulls, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                col_nulls, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_EQ(this
               ->template reduction_test<bool>(
-                col_nulls, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                col_nulls, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
               .first,
             expected);
   EXPECT_FALSE(
     this
       ->template reduction_test<bool>(
-        col_nulls, *init_scalar, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+        col_nulls, *init_scalar, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
       .second);
   EXPECT_FALSE(
     this
       ->template reduction_test<bool>(
-        col_nulls, *init_scalar, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+        col_nulls, *init_scalar, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
       .second);
 }
 
@@ -541,7 +541,7 @@ TYPED_TEST(MultiStepReductionTest, Mean)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(col,
-                                                cudf::make_mean_aggregation<reduce_aggregation>(),
+                                                *cudf::make_mean_aggregation<reduce_aggregation>(),
                                                 cudf::data_type(cudf::type_id::FLOAT64))
               .first,
             expected_value);
@@ -556,7 +556,7 @@ TYPED_TEST(MultiStepReductionTest, Mean)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(col_nulls,
-                                                cudf::make_mean_aggregation<reduce_aggregation>(),
+                                                *cudf::make_mean_aggregation<reduce_aggregation>(),
                                                 cudf::data_type(cudf::type_id::FLOAT64))
               .first,
             expected_value_nulls);
@@ -598,11 +598,11 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
   auto std_agg    = cudf::make_std_aggregation<reduce_aggregation>(ddof);
 
   EXPECT_EQ(
-    this->template reduction_test<double>(col, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+    this->template reduction_test<double>(col, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
       .first,
     var);
   EXPECT_EQ(
-    this->template reduction_test<double>(col, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+    this->template reduction_test<double>(col, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
       .first,
     std);
 
@@ -615,16 +615,16 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
   double var_nulls = calc_var(replaced_array, valid_count, ddof);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(
-    this
-      ->template reduction_test<double>(col_nulls, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
-      .first,
-    var_nulls);
-  EXPECT_EQ(
-    this
-      ->template reduction_test<double>(col_nulls, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
-      .first,
-    std_nulls);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+              .first,
+            var_nulls);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+              .first,
+            std_nulls);
 }
 
 // ----------------------------------------------------------------------------
@@ -633,7 +633,7 @@ template <typename T>
 struct ReductionMultiStepErrorCheck : public ReductionTest<T> {
   void reduction_error_check(cudf::test::fixed_width_column_wrapper<T>& col,
                              bool succeeded_condition,
-                             std::unique_ptr<reduce_aggregation> const& agg,
+                             reduce_aggregation const& agg,
                              cudf::data_type output_dtype)
   {
     const cudf::column_view underlying_column = col;
@@ -685,14 +685,14 @@ TYPED_TEST(ReductionMultiStepErrorCheck, DISABLED_ErrorHandling)
     auto var_agg        = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
     auto std_agg        = cudf::make_std_aggregation<reduce_aggregation>(ddof);
     this->reduction_error_check(
-      col, expect_succeed, cudf::make_mean_aggregation<reduce_aggregation>(), dtype);
-    this->reduction_error_check(col, expect_succeed, var_agg, dtype);
-    this->reduction_error_check(col, expect_succeed, std_agg, dtype);
+      col, expect_succeed, *cudf::make_mean_aggregation<reduce_aggregation>(), dtype);
+    this->reduction_error_check(col, expect_succeed, *var_agg, dtype);
+    this->reduction_error_check(col, expect_succeed, *std_agg, dtype);
 
     this->reduction_error_check(
-      col_nulls, expect_succeed, cudf::make_mean_aggregation<reduce_aggregation>(), dtype);
-    this->reduction_error_check(col_nulls, expect_succeed, var_agg, dtype);
-    this->reduction_error_check(col_nulls, expect_succeed, std_agg, dtype);
+      col_nulls, expect_succeed, *cudf::make_mean_aggregation<reduce_aggregation>(), dtype);
+    this->reduction_error_check(col_nulls, expect_succeed, *var_agg, dtype);
+    this->reduction_error_check(col_nulls, expect_succeed, *std_agg, dtype);
     return;
   };
 
@@ -706,7 +706,7 @@ struct ReductionDtypeTest : public cudf::test::BaseFixture {
   void reduction_test(std::vector<int>& int_values,
                       T_out expected_value,
                       bool succeeded_condition,
-                      std::unique_ptr<reduce_aggregation> const& agg,
+                      reduce_aggregation const& agg,
                       cudf::data_type out_dtype,
                       bool expected_overflow = false)
   {
@@ -739,7 +739,7 @@ TEST_F(ReductionDtypeTest, all_null_output)
     cudf::test::fixed_point_column_wrapper<int32_t>{{0, 0, 0}, {0, 0, 0}, numeric::scale_type{-2}}
       .release();
 
-  std::unique_ptr<cudf::scalar> result = cudf::reduce(*col, sum_agg, col->type());
+  std::unique_ptr<cudf::scalar> result = cudf::reduce(*col, *sum_agg, col->type());
   EXPECT_EQ(result->is_valid(), false);
   EXPECT_EQ(result->type().id(), col->type().id());
   EXPECT_EQ(result->type().scale(), col->type().scale());
@@ -757,27 +757,27 @@ TEST_F(ReductionDtypeTest, different_precision)
   this->reduction_test<int8_t, int8_t>(int_values,
                                        static_cast<int8_t>(expected_value),
                                        true,
-                                       sum_agg,
+                                       *sum_agg,
                                        cudf::data_type(cudf::type_id::INT8),
                                        expected_overflow);
 
   this->reduction_test<int8_t, int64_t>(int_values,
                                         static_cast<int64_t>(expected_value),
                                         true,
-                                        sum_agg,
+                                        *sum_agg,
                                         cudf::data_type(cudf::type_id::INT64));
 
   this->reduction_test<int8_t, double>(int_values,
                                        static_cast<double>(expected_value),
                                        true,
-                                       sum_agg,
+                                       *sum_agg,
                                        cudf::data_type(cudf::type_id::FLOAT64));
 
   // down cast (over flow)
   this->reduction_test<double, int8_t>(int_values,
                                        static_cast<int8_t>(expected_value),
                                        true,
-                                       sum_agg,
+                                       *sum_agg,
                                        cudf::data_type(cudf::type_id::INT8),
                                        expected_overflow);
 
@@ -785,7 +785,7 @@ TEST_F(ReductionDtypeTest, different_precision)
   this->reduction_test<double, int16_t>(int_values,
                                         static_cast<int16_t>(expected_value),
                                         true,
-                                        sum_agg,
+                                        *sum_agg,
                                         cudf::data_type(cudf::type_id::INT16));
 
   // not supported case:
@@ -794,21 +794,21 @@ TEST_F(ReductionDtypeTest, different_precision)
     int_values,
     cudf::timestamp_s{cudf::duration_s(expected_value)},
     false,
-    sum_agg,
+    *sum_agg,
     cudf::data_type(cudf::type_id::TIMESTAMP_SECONDS));
 
   this->reduction_test<cudf::timestamp_s, cudf::timestamp_ns>(
     int_values,
     cudf::timestamp_ns{cudf::duration_ns(expected_value)},
     false,
-    sum_agg,
+    *sum_agg,
     cudf::data_type(cudf::type_id::TIMESTAMP_NANOSECONDS));
 
   this->reduction_test<int8_t, cudf::timestamp_us>(
     int_values,
     cudf::timestamp_us{cudf::duration_us(expected_value)},
     false,
-    sum_agg,
+    *sum_agg,
     cudf::data_type(cudf::type_id::TIMESTAMP_MICROSECONDS));
 
   std::vector<bool> v = convert_values<bool>(int_values);
@@ -817,46 +817,49 @@ TEST_F(ReductionDtypeTest, different_precision)
   // it's an integer/float sum of ones and zeros.
   int expected = std::accumulate(v.begin(), v.end(), int{0});
 
-  this->reduction_test<bool, int8_t>(
-    int_values, static_cast<int8_t>(expected), true, sum_agg, cudf::data_type(cudf::type_id::INT8));
+  this->reduction_test<bool, int8_t>(int_values,
+                                     static_cast<int8_t>(expected),
+                                     true,
+                                     *sum_agg,
+                                     cudf::data_type(cudf::type_id::INT8));
   this->reduction_test<bool, int16_t>(int_values,
                                       static_cast<int16_t>(expected),
                                       true,
-                                      sum_agg,
+                                      *sum_agg,
                                       cudf::data_type(cudf::type_id::INT16));
   this->reduction_test<bool, int32_t>(int_values,
                                       static_cast<int32_t>(expected),
                                       true,
-                                      sum_agg,
+                                      *sum_agg,
                                       cudf::data_type(cudf::type_id::INT32));
   this->reduction_test<bool, int64_t>(int_values,
                                       static_cast<int64_t>(expected),
                                       true,
-                                      sum_agg,
+                                      *sum_agg,
                                       cudf::data_type(cudf::type_id::INT64));
   this->reduction_test<bool, float>(int_values,
                                     static_cast<float>(expected),
                                     true,
-                                    sum_agg,
+                                    *sum_agg,
                                     cudf::data_type(cudf::type_id::FLOAT32));
   this->reduction_test<bool, double>(int_values,
                                      static_cast<double>(expected),
                                      true,
-                                     sum_agg,
+                                     *sum_agg,
                                      cudf::data_type(cudf::type_id::FLOAT64));
 
   // make sure boolean arithmetic semantics are obeyed when reducing to a bool
   this->reduction_test<bool, bool>(
-    int_values, true, true, sum_agg, cudf::data_type(cudf::type_id::BOOL8));
+    int_values, true, true, *sum_agg, cudf::data_type(cudf::type_id::BOOL8));
 
   this->reduction_test<int32_t, bool>(
-    int_values, true, true, sum_agg, cudf::data_type(cudf::type_id::BOOL8));
+    int_values, true, true, *sum_agg, cudf::data_type(cudf::type_id::BOOL8));
 
   // cudf::timestamp_s and int64_t are not convertible types.
   this->reduction_test<cudf::timestamp_s, int64_t>(int_values,
                                                    static_cast<int64_t>(expected_value),
                                                    false,
-                                                   sum_agg,
+                                                   *sum_agg,
                                                    cudf::data_type(cudf::type_id::INT64));
 }
 
@@ -868,8 +871,10 @@ TEST_F(ReductionErrorTest, empty_column)
 {
   using T        = int32_t;
   auto statement = [](cudf::column_view const& col) {
-    std::unique_ptr<cudf::scalar> result = cudf::reduce(
-      col, cudf::make_sum_aggregation<reduce_aggregation>(), cudf::data_type(cudf::type_id::INT64));
+    std::unique_ptr<cudf::scalar> result =
+      cudf::reduce(col,
+                   *cudf::make_sum_aggregation<reduce_aggregation>(),
+                   cudf::data_type(cudf::type_id::INT64));
     EXPECT_EQ(result->is_valid(), false);
   };
 
@@ -935,11 +940,11 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   auto std_agg = cudf::make_std_aggregation<reduce_aggregation>(ddof);
 
   EXPECT_EQ(
-    this->template reduction_test<double>(col, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+    this->template reduction_test<double>(col, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
       .first,
     var);
   EXPECT_EQ(
-    this->template reduction_test<double>(col, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+    this->template reduction_test<double>(col, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
       .first,
     std);
 
@@ -953,16 +958,16 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   double var_nulls = calc_var(replaced_array, valid_count);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(
-    this
-      ->template reduction_test<double>(col_nulls, var_agg, cudf::data_type(cudf::type_id::FLOAT64))
-      .first,
-    var_nulls);
-  EXPECT_EQ(
-    this
-      ->template reduction_test<double>(col_nulls, std_agg, cudf::data_type(cudf::type_id::FLOAT64))
-      .first,
-    std_nulls);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+              .first,
+            var_nulls);
+  EXPECT_EQ(this
+              ->template reduction_test<double>(
+                col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+              .first,
+            std_nulls);
 }
 
 //-------------------------------------------------------------------
@@ -973,7 +978,7 @@ struct StringReductionTest : public cudf::test::BaseFixture,
   void reduction_test(cudf::column_view const& underlying_column,
                       std::string expected_value,
                       bool succeeded_condition,
-                      std::unique_ptr<reduce_aggregation> const& agg,
+                      reduce_aggregation const& agg,
                       cudf::data_type output_dtype = cudf::data_type{})
   {
     if (cudf::data_type{} == output_dtype) output_dtype = underlying_column.type();
@@ -986,7 +991,7 @@ struct StringReductionTest : public cudf::test::BaseFixture,
       if (!result1->is_valid())
         std::cout << "expected=" << expected_value << ",got=" << result1->to_string() << std::endl;
       EXPECT_EQ(expected_value, result1->to_string())
-        << (agg->kind == aggregation::MIN ? "MIN" : "MAX");
+        << (agg.kind == aggregation::MIN ? "MIN" : "MAX");
     };
 
     if (succeeded_condition) {
@@ -1000,7 +1005,7 @@ struct StringReductionTest : public cudf::test::BaseFixture,
                       std::string initial_value,
                       std::string expected_value,
                       bool succeeded_condition,
-                      std::unique_ptr<reduce_aggregation> const& agg,
+                      reduce_aggregation const& agg,
                       cudf::data_type output_dtype = cudf::data_type{})
   {
     if (cudf::data_type{} == output_dtype) output_dtype = underlying_column.type();
@@ -1015,7 +1020,7 @@ struct StringReductionTest : public cudf::test::BaseFixture,
       if (!result1->is_valid())
         std::cout << "expected=" << expected_value << ",got=" << result1->to_string() << std::endl;
       EXPECT_EQ(expected_value, result1->to_string())
-        << (agg->kind == aggregation::MIN ? "MIN" : "MAX");
+        << (agg.kind == aggregation::MIN ? "MIN" : "MAX");
     };
 
     if (succeeded_condition) {
@@ -1072,34 +1077,38 @@ TEST_P(StringReductionTest, MinMax)
 
   // MIN
   this->reduction_test(
-    col, expected_min_result, succeed, cudf::make_min_aggregation<reduce_aggregation>());
-  this->reduction_test(
-    col_nulls, expected_min_null_result, succeed, cudf::make_min_aggregation<reduce_aggregation>());
+    col, expected_min_result, succeed, *cudf::make_min_aggregation<reduce_aggregation>());
+  this->reduction_test(col_nulls,
+                       expected_min_null_result,
+                       succeed,
+                       *cudf::make_min_aggregation<reduce_aggregation>());
   this->reduction_test(col,
                        initial_value,
                        expected_min_init_result,
                        succeed,
-                       cudf::make_min_aggregation<reduce_aggregation>());
+                       *cudf::make_min_aggregation<reduce_aggregation>());
   this->reduction_test(col_nulls,
                        initial_value,
                        expected_min_init_null_result,
                        succeed,
-                       cudf::make_min_aggregation<reduce_aggregation>());
+                       *cudf::make_min_aggregation<reduce_aggregation>());
   // MAX
   this->reduction_test(
-    col, expected_max_result, succeed, cudf::make_max_aggregation<reduce_aggregation>());
-  this->reduction_test(
-    col_nulls, expected_max_null_result, succeed, cudf::make_max_aggregation<reduce_aggregation>());
+    col, expected_max_result, succeed, *cudf::make_max_aggregation<reduce_aggregation>());
+  this->reduction_test(col_nulls,
+                       expected_max_null_result,
+                       succeed,
+                       *cudf::make_max_aggregation<reduce_aggregation>());
   this->reduction_test(col,
                        initial_value,
                        expected_max_init_result,
                        succeed,
-                       cudf::make_max_aggregation<reduce_aggregation>());
+                       *cudf::make_max_aggregation<reduce_aggregation>());
   this->reduction_test(col_nulls,
                        initial_value,
                        expected_max_init_null_result,
                        succeed,
-                       cudf::make_max_aggregation<reduce_aggregation>());
+                       *cudf::make_max_aggregation<reduce_aggregation>());
 
   // MINMAX
   auto result = cudf::minmax(col);
@@ -1176,16 +1185,16 @@ TEST_F(StringReductionTest, AllNull)
 
   // MIN
   auto result =
-    cudf::reduce(col_nulls, cudf::make_min_aggregation<reduce_aggregation>(), output_dtype);
+    cudf::reduce(col_nulls, *cudf::make_min_aggregation<reduce_aggregation>(), output_dtype);
   EXPECT_FALSE(result->is_valid());
   result = cudf::reduce(
-    col_nulls, cudf::make_min_aggregation<reduce_aggregation>(), output_dtype, *initial_value);
+    col_nulls, *cudf::make_min_aggregation<reduce_aggregation>(), output_dtype, *initial_value);
   EXPECT_FALSE(result->is_valid());
   // MAX
-  result = cudf::reduce(col_nulls, cudf::make_max_aggregation<reduce_aggregation>(), output_dtype);
+  result = cudf::reduce(col_nulls, *cudf::make_max_aggregation<reduce_aggregation>(), output_dtype);
   EXPECT_FALSE(result->is_valid());
   result = cudf::reduce(
-    col_nulls, cudf::make_max_aggregation<reduce_aggregation>(), output_dtype, *initial_value);
+    col_nulls, *cudf::make_max_aggregation<reduce_aggregation>(), output_dtype, *initial_value);
   EXPECT_FALSE(result->is_valid());
   // MINMAX
   auto mm_result = cudf::minmax(col_nulls);
@@ -1209,7 +1218,7 @@ TYPED_TEST(ReductionTest, Median)
     return 13.5;
   }();
   EXPECT_EQ(
-    this->template reduction_test<double>(col, cudf::make_median_aggregation<reduce_aggregation>())
+    this->template reduction_test<double>(col, *cudf::make_median_aggregation<reduce_aggregation>())
       .first,
     expected_value);
 
@@ -1220,8 +1229,8 @@ TYPED_TEST(ReductionTest, Median)
     return 14.0;
   }();
   EXPECT_EQ(this
-              ->template reduction_test<double>(col_odd,
-                                                cudf::make_median_aggregation<reduce_aggregation>())
+              ->template reduction_test<double>(
+                col_odd, *cudf::make_median_aggregation<reduce_aggregation>())
               .first,
             expected_value_odd);
 
@@ -1234,8 +1243,8 @@ TYPED_TEST(ReductionTest, Median)
   }();
 
   EXPECT_EQ(this
-              ->template reduction_test<double>(col_nulls,
-                                                cudf::make_median_aggregation<reduce_aggregation>())
+              ->template reduction_test<double>(
+                col_nulls, *cudf::make_median_aggregation<reduce_aggregation>())
               .first,
             expected_null_value);
 
@@ -1246,8 +1255,8 @@ TYPED_TEST(ReductionTest, Median)
     return 13.5;
   }();
   EXPECT_EQ(this
-              ->template reduction_test<double>(col_nulls_odd,
-                                                cudf::make_median_aggregation<reduce_aggregation>())
+              ->template reduction_test<double>(
+                col_nulls_odd, *cudf::make_median_aggregation<reduce_aggregation>())
               .first,
             expected_null_value_odd);
 }
@@ -1266,14 +1275,14 @@ TYPED_TEST(ReductionTest, Quantile)
   double expected_value0 = std::is_same_v<T, bool> || std::is_unsigned_v<T> ? v[4] : v[6];
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col, cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
+                col, *cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
               .first,
             expected_value0);
 
   double expected_value1 = v[3];
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col, cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
+                col, *cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
               .first,
             expected_value1);
 
@@ -1283,12 +1292,12 @@ TYPED_TEST(ReductionTest, Quantile)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
+                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
               .first,
             expected_value0);
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
+                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
               .first,
             expected_null_value1);
 }
@@ -1303,16 +1312,18 @@ TYPED_TEST(ReductionTest, UniqueCount)
   // test without nulls
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
   cudf::size_type expected_value = std::is_same_v<T, bool> ? 2 : 6;
-  EXPECT_EQ(this
-              ->template reduction_test<cudf::size_type>(
-                col, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE))
-              .first,
-            expected_value);
-  EXPECT_EQ(this
-              ->template reduction_test<cudf::size_type>(
-                col, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE))
-              .first,
-            expected_value);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<cudf::size_type>(
+        col, *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE))
+      .first,
+    expected_value);
+  EXPECT_EQ(
+    this
+      ->template reduction_test<cudf::size_type>(
+        col, *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE))
+      .first,
+    expected_value);
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
@@ -1322,13 +1333,13 @@ TYPED_TEST(ReductionTest, UniqueCount)
   EXPECT_EQ(
     this
       ->template reduction_test<cudf::size_type>(
-        col_nulls, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE))
+        col_nulls, *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE))
       .first,
     expected_null_value0);
   EXPECT_EQ(
     this
       ->template reduction_test<cudf::size_type>(
-        col_nulls, cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE))
+        col_nulls, *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE))
       .first,
     expected_null_value1);
 }
@@ -1357,7 +1368,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductZeroScale)
   auto const out_type = static_cast<cudf::column_view>(column).type();
 
   auto const result =
-    cudf::reduce(column, cudf::make_product_aggregation<reduce_aggregation>(), out_type);
+    cudf::reduce(column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type);
   auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
   auto const result_fp     = decimalXX{result_scalar->value()};
 
@@ -1370,7 +1381,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductZeroScale)
   auto const init_scalar = cudf::make_fixed_point_scalar<decimalXX>(2, scale_type{0});
 
   auto const init_result = cudf::reduce(
-    column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
   auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
   auto const init_result_fp     = decimalXX{init_result_scalar->value()};
 
@@ -1392,7 +1403,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProduct)
     auto const expected = decimalXX{scaled_integer<RepType>{36, scale_type{i * 6}}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_product_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1402,7 +1413,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProduct)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1423,7 +1434,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductWithNulls)
     auto const expected = decimalXX{scaled_integer<RepType>{6, scale_type{i * 3}}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_product_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1433,7 +1444,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionProductWithNulls)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1455,7 +1466,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSum)
     auto const out_type = static_cast<cudf::column_view>(column).type();
 
     auto const result =
-      cudf::reduce(column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1465,7 +1476,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSum)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1491,7 +1502,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumAlternate)
   auto const out_type = static_cast<cudf::column_view>(column).type();
 
   auto const result =
-    cudf::reduce(column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
+    cudf::reduce(column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
   auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
   EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1502,7 +1513,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumAlternate)
   auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale_type{0});
 
   auto const init_result =
-    cudf::reduce(column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    cudf::reduce(column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
   auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
   EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1523,7 +1534,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumFractional)
     auto const expected = decimalXX{scaled_integer<RepType>{666, scale}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1533,7 +1544,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumFractional)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(2, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1557,7 +1568,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumLarge)
     auto const expected       = decimalXX{scaled_integer<RepType>{expected_value, scale}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1570,7 +1581,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumLarge)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(init_value, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_sum_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1591,7 +1602,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMin)
     auto const out_type = static_cast<cudf::column_view>(column).type();
 
     auto const result =
-      cudf::reduce(column, cudf::make_min_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_min_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), ONE);
@@ -1601,7 +1612,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMin)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(0, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_min_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_min_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1623,7 +1634,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMinLarge)
     auto const expected = decimalXX{0, scale};
 
     auto const result =
-      cudf::reduce(column, cudf::make_min_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_min_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1633,7 +1644,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMinLarge)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(0, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_min_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_min_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1654,7 +1665,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMax)
     auto const out_type = static_cast<cudf::column_view>(column).type();
 
     auto const result =
-      cudf::reduce(column, cudf::make_max_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_max_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), FOUR);
@@ -1664,7 +1675,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMax)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(5, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_max_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_max_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1686,7 +1697,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMaxLarge)
     auto const expected = decimalXX{scaled_integer<RepType>{42, scale}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_max_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_max_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1696,7 +1707,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMaxLarge)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimalXX>(43, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_max_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_max_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(init_result.get());
 
     EXPECT_EQ(init_result_scalar->fixed_point_value(), init_expected);
@@ -1716,7 +1727,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionNUnique)
     auto const out_type = static_cast<cudf::column_view>(column).type();
 
     auto const result =
-      cudf::reduce(column, cudf::make_nunique_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_nunique_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<cudf::size_type>*>(result.get());
 
     EXPECT_EQ(result_scalar->value(), 4);
@@ -1737,7 +1748,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionSumOfSquares)
     auto const expected = decimalXX{scaled_integer<RepType>{30, scale_type{i * 2}}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1758,7 +1769,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMedianOddNumberOfElements)
     auto const expected = decimalXX{scaled_integer<RepType>{2, scale}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_median_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_median_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1779,7 +1790,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionMedianEvenNumberOfElements)
     auto const expected = decimalXX{scaled_integer<RepType>{25, scale}};
 
     auto const result =
-      cudf::reduce(column, cudf::make_median_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_median_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1799,11 +1810,11 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionQuantile)
     auto const out_type = static_cast<cudf::column_view>(column).type();
 
     for (auto const i : {0, 1, 2, 3, 4}) {
-      auto const expected = decimalXX{scaled_integer<RepType>{i + 1, scale}};
-      auto const result   = cudf::reduce(
-        column,
-        cudf::make_quantile_aggregation<reduce_aggregation>({i / 4.0}, cudf::interpolation::LINEAR),
-        out_type);
+      auto const expected      = decimalXX{scaled_integer<RepType>{i + 1, scale}};
+      auto const result        = cudf::reduce(column,
+                                       *cudf::make_quantile_aggregation<reduce_aggregation>(
+                                         {i / 4.0}, cudf::interpolation::LINEAR),
+                                       out_type);
       auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
       EXPECT_EQ(result_scalar->fixed_point_value(), expected);
     }
@@ -1827,7 +1838,7 @@ TYPED_TEST(FixedPointTestAllReps, FixedPointReductionNthElement)
       auto const expected = decimalXX{scaled_integer<RepType>{values[i], scale}};
       auto const result   = cudf::reduce(
         column,
-        cudf::make_nth_element_aggregation<reduce_aggregation>(i, cudf::null_policy::INCLUDE),
+        *cudf::make_nth_element_aggregation<reduce_aggregation>(i, cudf::null_policy::INCLUDE),
         out_type);
       auto const result_scalar = static_cast<cudf::scalar_type_t<decimalXX>*>(result.get());
       EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1851,7 +1862,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction)
 
     auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale};
     auto const result =
-      cudf::reduce(column, cudf::make_product_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1861,7 +1872,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimal128>(2, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar =
       static_cast<cudf::scalar_type_t<decimal128>*>(init_result.get());
 
@@ -1882,7 +1893,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction2)
 
     auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale};
     auto const result =
-      cudf::reduce(column, cudf::make_product_aggregation<reduce_aggregation>(), out_type);
+      cudf::reduce(column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type);
     auto const result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(result.get());
 
     EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1892,7 +1903,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction2)
     auto const init_scalar   = cudf::make_fixed_point_scalar<decimal128>(3, scale);
 
     auto const init_result = cudf::reduce(
-      column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+      column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
     auto const init_result_scalar =
       static_cast<cudf::scalar_type_t<decimal128>*>(init_result.get());
 
@@ -1914,7 +1925,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction3)
 
   auto const out_type = cudf::data_type{cudf::type_id::DECIMAL128, scale};
   auto const result =
-    cudf::reduce(column, cudf::make_product_aggregation<reduce_aggregation>(), out_type);
+    cudf::reduce(column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type);
   auto const result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(result.get());
 
   EXPECT_EQ(result_scalar->fixed_point_value(), expected);
@@ -1923,7 +1934,7 @@ TEST_F(Decimal128Only, Decimal128ProductReduction3)
   auto const init_scalar = cudf::make_fixed_point_scalar<decimal128>(5, scale);
 
   auto const init_result = cudf::reduce(
-    column, cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
+    column, *cudf::make_product_aggregation<reduce_aggregation>(), out_type, *init_scalar);
   auto const init_result_scalar = static_cast<cudf::scalar_type_t<decimal128>*>(init_result.get());
 
   EXPECT_EQ(init_result_scalar->fixed_point_value(), expected);
@@ -1955,21 +1966,23 @@ TYPED_TEST(ReductionTest, NthElement)
     auto const index         = mod(n, v.size());
     T expected_value_nonull  = v[index];
     bool const expected_null = host_bools[index];
-    EXPECT_EQ(this
-                ->template reduction_test<T>(col,
-                                             cudf::make_nth_element_aggregation<reduce_aggregation>(
-                                               n, cudf::null_policy::INCLUDE))
-                .first,
-              expected_value_nonull);
-    EXPECT_EQ(this
-                ->template reduction_test<T>(col,
-                                             cudf::make_nth_element_aggregation<reduce_aggregation>(
-                                               n, cudf::null_policy::EXCLUDE))
-                .first,
-              expected_value_nonull);
+    EXPECT_EQ(
+      this
+        ->template reduction_test<T>(
+          col,
+          *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE))
+        .first,
+      expected_value_nonull);
+    EXPECT_EQ(
+      this
+        ->template reduction_test<T>(
+          col,
+          *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE))
+        .first,
+      expected_value_nonull);
     auto res = this->template reduction_test<T>(
       col_nulls,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE));
+      *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE));
     EXPECT_EQ(res.first, expected_value_nonull);
     EXPECT_EQ(res.second, expected_null);
   }
@@ -1977,25 +1990,26 @@ TYPED_TEST(ReductionTest, NthElement)
   for (cudf::size_type n :
        {-valid_count, -valid_count / 2, -2, -1, 0, 1, 2, valid_count / 2, valid_count - 1}) {
     T expected_value_null = v_valid[mod(n, v_valid.size())];
-    EXPECT_EQ(this
-                ->template reduction_test<T>(col_nulls,
-                                             cudf::make_nth_element_aggregation<reduce_aggregation>(
-                                               n, cudf::null_policy::EXCLUDE))
-                .first,
-              expected_value_null);
+    EXPECT_EQ(
+      this
+        ->template reduction_test<T>(
+          col_nulls,
+          *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE))
+        .first,
+      expected_value_null);
   }
   // error cases
   for (cudf::size_type n : {-input_size - 1, input_size}) {
     EXPECT_ANY_THROW(this->template reduction_test<T>(
-      col, cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE)));
+      col, *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE)));
     EXPECT_ANY_THROW(this->template reduction_test<T>(
       col_nulls,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE)));
+      *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::INCLUDE)));
     EXPECT_ANY_THROW(this->template reduction_test<T>(
-      col, cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE)));
+      col, *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE)));
     EXPECT_ANY_THROW(this->template reduction_test<T>(
       col_nulls,
-      cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE)));
+      *cudf::make_nth_element_aggregation<reduce_aggregation>(n, cudf::null_policy::EXCLUDE)));
   }
 }
 
@@ -2019,25 +2033,25 @@ TEST_P(DictionaryStringReductionTest, MinMax)
   this->reduction_test(col,
                        *(std::min_element(host_strings.begin(), host_strings.end())),
                        true,
-                       cudf::make_min_aggregation<reduce_aggregation>(),
+                       *cudf::make_min_aggregation<reduce_aggregation>(),
                        output_type);
   // sliced
   this->reduction_test(cudf::slice(col, {1, 7}).front(),
                        *(std::min_element(host_strings.begin() + 1, host_strings.begin() + 7)),
                        true,
-                       cudf::make_min_aggregation<reduce_aggregation>(),
+                       *cudf::make_min_aggregation<reduce_aggregation>(),
                        output_type);
   // MAX
   this->reduction_test(col,
                        *(std::max_element(host_strings.begin(), host_strings.end())),
                        true,
-                       cudf::make_max_aggregation<reduce_aggregation>(),
+                       *cudf::make_max_aggregation<reduce_aggregation>(),
                        output_type);
   // sliced
   this->reduction_test(cudf::slice(col, {1, 7}).front(),
                        *(std::max_element(host_strings.begin() + 1, host_strings.begin() + 7)),
                        true,
-                       cudf::make_max_aggregation<reduce_aggregation>(),
+                       *cudf::make_max_aggregation<reduce_aggregation>(),
                        output_type);
 }
 
@@ -2062,39 +2076,39 @@ TYPED_TEST(DictionaryAnyAllTest, AnyAll)
     cudf::test::dictionary_column_wrapper<T> all_col(v_all.begin(), v_all.end());
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(
-                    all_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                    all_col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
                   .first);
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(
-                    all_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                    all_col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
                   .first);
     cudf::test::dictionary_column_wrapper<T> none_col(v_none.begin(), v_none.end());
     EXPECT_FALSE(this
                    ->template reduction_test<bool>(
-                     none_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                     none_col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
                    .first);
     EXPECT_FALSE(this
                    ->template reduction_test<bool>(
-                     none_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                     none_col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
                    .first);
     cudf::test::dictionary_column_wrapper<T> some_col(v_some.begin(), v_some.end());
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(
-                    some_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                    some_col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
                   .first);
     EXPECT_FALSE(this
                    ->template reduction_test<bool>(
-                     some_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                     some_col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
                    .first);
     // sliced test
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(cudf::slice(some_col, {1, 3}).front(),
-                                                  cudf::make_any_aggregation<reduce_aggregation>(),
+                                                  *cudf::make_any_aggregation<reduce_aggregation>(),
                                                   output_dtype)
                   .first);
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(cudf::slice(some_col, {1, 2}).front(),
-                                                  cudf::make_all_aggregation<reduce_aggregation>(),
+                                                  *cudf::make_all_aggregation<reduce_aggregation>(),
                                                   output_dtype)
                   .first);
   }
@@ -2104,39 +2118,39 @@ TYPED_TEST(DictionaryAnyAllTest, AnyAll)
     cudf::test::dictionary_column_wrapper<T> all_col(v_all.begin(), v_all.end(), valid.begin());
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(
-                    all_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                    all_col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
                   .first);
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(
-                    all_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                    all_col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
                   .first);
     cudf::test::dictionary_column_wrapper<T> none_col(v_none.begin(), v_none.end(), valid.begin());
     EXPECT_FALSE(this
                    ->template reduction_test<bool>(
-                     none_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                     none_col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
                    .first);
     EXPECT_FALSE(this
                    ->template reduction_test<bool>(
-                     none_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                     none_col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
                    .first);
     cudf::test::dictionary_column_wrapper<T> some_col(v_some.begin(), v_some.end(), valid.begin());
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(
-                    some_col, cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
+                    some_col, *cudf::make_any_aggregation<reduce_aggregation>(), output_dtype)
                   .first);
     EXPECT_FALSE(this
                    ->template reduction_test<bool>(
-                     some_col, cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
+                     some_col, *cudf::make_all_aggregation<reduce_aggregation>(), output_dtype)
                    .first);
     // sliced test
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(cudf::slice(some_col, {0, 3}).front(),
-                                                  cudf::make_any_aggregation<reduce_aggregation>(),
+                                                  *cudf::make_any_aggregation<reduce_aggregation>(),
                                                   output_dtype)
                   .first);
     EXPECT_TRUE(this
                   ->template reduction_test<bool>(cudf::slice(some_col, {1, 4}).front(),
-                                                  cudf::make_all_aggregation<reduce_aggregation>(),
+                                                  *cudf::make_all_aggregation<reduce_aggregation>(),
                                                   output_dtype)
                   .first);
   }
@@ -2160,7 +2174,7 @@ TYPED_TEST(DictionaryReductionTest, Sum)
   T expected_value = std::accumulate(v.begin(), v.end(), T{0});
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, cudf::make_sum_aggregation<reduce_aggregation>(), output_type)
+                col, *cudf::make_sum_aggregation<reduce_aggregation>(), output_type)
               .first,
             expected_value);
 
@@ -2173,7 +2187,7 @@ TYPED_TEST(DictionaryReductionTest, Sum)
   }();
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col_nulls, cudf::make_sum_aggregation<reduce_aggregation>(), output_type)
+                col_nulls, *cudf::make_sum_aggregation<reduce_aggregation>(), output_type)
               .first,
             expected_value);
 }
@@ -2194,7 +2208,7 @@ TYPED_TEST(DictionaryReductionTest, Product)
 
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, cudf::make_product_aggregation<reduce_aggregation>(), output_type)
+                col, *cudf::make_product_aggregation<reduce_aggregation>(), output_type)
               .first,
             calc_prod(v));
 
@@ -2204,7 +2218,7 @@ TYPED_TEST(DictionaryReductionTest, Product)
 
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col_nulls, cudf::make_product_aggregation<reduce_aggregation>(), output_type)
+                col_nulls, *cudf::make_product_aggregation<reduce_aggregation>(), output_type)
               .first,
             calc_prod(replace_nulls(v, validity, T{1})));
 }
@@ -2225,7 +2239,7 @@ TYPED_TEST(DictionaryReductionTest, SumOfSquare)
 
   EXPECT_EQ(this
               ->template reduction_test<T>(
-                col, cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), output_type)
+                col, *cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), output_type)
               .first,
             calc_reduction(v));
 
@@ -2233,11 +2247,12 @@ TYPED_TEST(DictionaryReductionTest, SumOfSquare)
   std::vector<bool> validity({1, 1, 0, 0, 1, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  EXPECT_EQ(this
-              ->template reduction_test<T>(
-                col_nulls, cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), output_type)
-              .first,
-            calc_reduction(replace_nulls(v, validity, T{0})));
+  EXPECT_EQ(
+    this
+      ->template reduction_test<T>(
+        col_nulls, *cudf::make_sum_of_squares_aggregation<reduce_aggregation>(), output_type)
+      .first,
+    calc_reduction(replace_nulls(v, validity, T{0})));
 }
 
 TYPED_TEST(DictionaryReductionTest, Mean)
@@ -2257,7 +2272,7 @@ TYPED_TEST(DictionaryReductionTest, Mean)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col, cudf::make_mean_aggregation<reduce_aggregation>(), output_type)
+                col, *cudf::make_mean_aggregation<reduce_aggregation>(), output_type)
               .first,
             calc_mean(v, v.size()));
 
@@ -2269,7 +2284,7 @@ TYPED_TEST(DictionaryReductionTest, Mean)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, cudf::make_mean_aggregation<reduce_aggregation>(), output_type)
+                col_nulls, *cudf::make_mean_aggregation<reduce_aggregation>(), output_type)
               .first,
             calc_mean(replace_nulls(v, validity, T{0}), valid_count));
 }
@@ -2304,8 +2319,8 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   auto var_agg               = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg               = cudf::make_std_aggregation<reduce_aggregation>(ddof);
 
-  EXPECT_EQ(this->template reduction_test<double>(col, var_agg, output_type).first, var);
-  EXPECT_EQ(this->template reduction_test<double>(col, std_agg, output_type).first, std);
+  EXPECT_EQ(this->template reduction_test<double>(col, *var_agg, output_type).first, var);
+  EXPECT_EQ(this->template reduction_test<double>(col, *std_agg, output_type).first, std);
 
   // test with nulls
   std::vector<bool> validity({1, 1, 0, 1, 1, 1, 0, 1});
@@ -2316,9 +2331,9 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   double var_nulls = calc_var(replace_nulls(v, validity, T{0}), valid_count, ddof);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this->template reduction_test<double>(col_nulls, var_agg, output_type).first,
+  EXPECT_EQ(this->template reduction_test<double>(col_nulls, *var_agg, output_type).first,
             var_nulls);
-  EXPECT_EQ(this->template reduction_test<double>(col_nulls, std_agg, output_type).first,
+  EXPECT_EQ(this->template reduction_test<double>(col_nulls, *std_agg, output_type).first,
             std_nulls);
 }
 
@@ -2334,7 +2349,7 @@ TYPED_TEST(DictionaryReductionTest, NthElement)
   cudf::size_type n = 5;
   EXPECT_EQ(this
               ->template reduction_test<T>(col,
-                                           cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                           *cudf::make_nth_element_aggregation<reduce_aggregation>(
                                              n, cudf::null_policy::INCLUDE),
                                            output_type)
               .first,
@@ -2346,7 +2361,7 @@ TYPED_TEST(DictionaryReductionTest, NthElement)
 
   EXPECT_EQ(this
               ->template reduction_test<T>(col_nulls,
-                                           cudf::make_nth_element_aggregation<reduce_aggregation>(
+                                           *cudf::make_nth_element_aggregation<reduce_aggregation>(
                                              n, cudf::null_policy::INCLUDE),
                                            output_type)
               .first,
@@ -2355,7 +2370,7 @@ TYPED_TEST(DictionaryReductionTest, NthElement)
     this
       ->template reduction_test<T>(
         col_nulls,
-        cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE),
+        *cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE),
         output_type)
       .second);
 }
@@ -2372,7 +2387,7 @@ TYPED_TEST(DictionaryReductionTest, UniqueCount)
   EXPECT_EQ(this
               ->template reduction_test<int>(
                 col,
-                cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
+                *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
                 output_type)
               .first,
             6);
@@ -2384,14 +2399,14 @@ TYPED_TEST(DictionaryReductionTest, UniqueCount)
   EXPECT_EQ(this
               ->template reduction_test<int>(
                 col_nulls,
-                cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
+                *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::INCLUDE),
                 output_type)
               .first,
             7);
   EXPECT_EQ(this
               ->template reduction_test<int>(
                 col_nulls,
-                cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE),
+                *cudf::make_nunique_aggregation<reduce_aggregation>(cudf::null_policy::EXCLUDE),
                 output_type)
               .first,
             6);
@@ -2402,12 +2417,11 @@ TYPED_TEST(DictionaryReductionTest, Median)
   using T = TypeParam;
   std::vector<int> int_values({6, -14, 13, 64, 0, -13, -20, 45});
   std::vector<T> v = convert_values<T>(int_values);
-  cudf::data_type output_type{cudf::type_to_id<double>()};
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
   EXPECT_EQ(
-    this->template reduction_test<double>(col, cudf::make_median_aggregation<reduce_aggregation>())
+    this->template reduction_test<double>(col, *cudf::make_median_aggregation<reduce_aggregation>())
       .first,
     (std::is_signed_v<T>) ? 3.0 : 13.5);
 
@@ -2415,8 +2429,8 @@ TYPED_TEST(DictionaryReductionTest, Median)
   std::vector<bool> validity({1, 1, 1, 0, 1, 1, 1, 1});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
   EXPECT_EQ(this
-              ->template reduction_test<double>(col_nulls,
-                                                cudf::make_median_aggregation<reduce_aggregation>())
+              ->template reduction_test<double>(
+                col_nulls, *cudf::make_median_aggregation<reduce_aggregation>())
               .first,
             (std::is_signed_v<T>) ? 0.0 : 13.0);
 }
@@ -2427,19 +2441,18 @@ TYPED_TEST(DictionaryReductionTest, Quantile)
   std::vector<int> int_values({6, -14, 13, 64, 0, -13, -20, 45});
   std::vector<T> v = convert_values<T>(int_values);
   cudf::interpolation interp{cudf::interpolation::LINEAR};
-  cudf::data_type output_type{cudf::type_to_id<double>()};
 
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
   double expected_value = std::is_same_v<T, bool> || std::is_unsigned_v<T> ? 0.0 : -20.0;
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col, cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
+                col, *cudf::make_quantile_aggregation<reduce_aggregation>({0.0}, interp))
               .first,
             expected_value);
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col, cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
+                col, *cudf::make_quantile_aggregation<reduce_aggregation>({1.0}, interp))
               .first,
             64.0);
 
@@ -2449,12 +2462,12 @@ TYPED_TEST(DictionaryReductionTest, Quantile)
 
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
+                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({0}, interp))
               .first,
             expected_value);
   EXPECT_EQ(this
               ->template reduction_test<double>(
-                col_nulls, cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
+                col_nulls, *cudf::make_quantile_aggregation<reduce_aggregation>({1}, interp))
               .first,
             45.0);
 }
@@ -2464,7 +2477,7 @@ struct ListReductionTest : public cudf::test::BaseFixture {
                       cudf::column_view const& expected_value,
                       bool succeeded_condition,
                       bool is_valid,
-                      std::unique_ptr<reduce_aggregation> const& agg)
+                      reduce_aggregation const& agg)
   {
     auto statement = [&]() {
       std::unique_ptr<cudf::scalar> result =
@@ -2498,7 +2511,7 @@ TEST_F(ListReductionTest, ListReductionNthElement)
     ElementCol{0, 5, -3},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE));
 
   // test with null-exclude
   std::vector<bool> validity{1, 0, 0, 1, 1, 0};
@@ -2508,7 +2521,7 @@ TEST_F(ListReductionTest, ListReductionNthElement)
     ElementCol{-2},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(1, cudf::null_policy::EXCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(1, cudf::null_policy::EXCLUDE));
 
   // test with null-include
   this->reduction_test(
@@ -2516,7 +2529,7 @@ TEST_F(ListReductionTest, ListReductionNthElement)
     ElementCol{},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(1, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(1, cudf::null_policy::INCLUDE));
 }
 
 TEST_F(ListReductionTest, NestedListReductionNthElement)
@@ -2533,7 +2546,7 @@ TEST_F(ListReductionTest, NestedListReductionNthElement)
     LCW{{}, {2, 3, 4}},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
 
   // test with null-include
   this->reduction_test(
@@ -2541,7 +2554,7 @@ TEST_F(ListReductionTest, NestedListReductionNthElement)
     LCW{},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE));
 
   // test with null-exclude
   this->reduction_test(
@@ -2549,7 +2562,7 @@ TEST_F(ListReductionTest, NestedListReductionNthElement)
     LCW{{11}, {12, 13}},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::EXCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::EXCLUDE));
 }
 
 TEST_F(ListReductionTest, NonValidListReductionNthElement)
@@ -2564,7 +2577,7 @@ TEST_F(ListReductionTest, NonValidListReductionNthElement)
     ElementCol{},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
 
   // test against empty input
   this->reduction_test(
@@ -2572,7 +2585,7 @@ TEST_F(ListReductionTest, NonValidListReductionNthElement)
     ElementCol{},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
 }
 
 struct StructReductionTest : public cudf::test::BaseFixture {
@@ -2582,7 +2595,7 @@ struct StructReductionTest : public cudf::test::BaseFixture {
                       cudf::table_view const& expected_value,
                       bool succeeded_condition,
                       bool is_valid,
-                      std::unique_ptr<reduce_aggregation> const& agg)
+                      reduce_aggregation const& agg)
   {
     auto statement = [&]() {
       std::unique_ptr<cudf::scalar> result =
@@ -2622,7 +2635,7 @@ TEST_F(StructReductionTest, StructReductionNthElement)
     cudf::table_view{{result_col0, result_col1, result_col2}},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(2, cudf::null_policy::INCLUDE));
 
   // test with null-include
   std::vector<bool> validity{1, 1, 1, 0, 1, 0, 0, 1};
@@ -2639,7 +2652,7 @@ TEST_F(StructReductionTest, StructReductionNthElement)
     cudf::table_view{{result_col0, result_col1, result_col2}},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(6, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(6, cudf::null_policy::INCLUDE));
 
   // test with null-exclude
   result_col0 = ICW{{28}, {1}};
@@ -2650,7 +2663,7 @@ TEST_F(StructReductionTest, StructReductionNthElement)
     cudf::table_view{{result_col0, result_col1, result_col2}},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(4, cudf::null_policy::EXCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(4, cudf::null_policy::EXCLUDE));
 }
 
 TEST_F(StructReductionTest, NestedStructReductionNthElement)
@@ -2673,7 +2686,7 @@ TEST_F(StructReductionTest, NestedStructReductionNthElement)
     cudf::table_view{{result_col0, result_col1, result_col2}},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(1, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(1, cudf::null_policy::INCLUDE));
 
   // test with null-include
   result_child0 = ICW{0};
@@ -2685,7 +2698,7 @@ TEST_F(StructReductionTest, NestedStructReductionNthElement)
     cudf::table_view{{result_col0, result_col1, result_col2}},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(3, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(3, cudf::null_policy::INCLUDE));
 
   // test with null-exclude
   result_child0 = ICW{0};
@@ -2697,7 +2710,7 @@ TEST_F(StructReductionTest, NestedStructReductionNthElement)
     cudf::table_view{{result_col0, result_col1, result_col2}},  // expected_value,
     true,
     true,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(3, cudf::null_policy::EXCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(3, cudf::null_policy::EXCLUDE));
 }
 
 TEST_F(StructReductionTest, NonValidStructReductionNthElement)
@@ -2717,7 +2730,7 @@ TEST_F(StructReductionTest, NonValidStructReductionNthElement)
     cudf::table_view{{ret_col0, ret_col1, ret_col2}},  // expected_value,
     true,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
 
   // test against empty input (would fail because we can not create empty struct scalar)
   child0     = ICW{};
@@ -2732,7 +2745,7 @@ TEST_F(StructReductionTest, NonValidStructReductionNthElement)
     cudf::table_view{{ret_col0, ret_col1, ret_col2}},  // expected_value,
     false,
     false,
-    cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
+    *cudf::make_nth_element_aggregation<reduce_aggregation>(0, cudf::null_policy::INCLUDE));
 }
 
 TEST_F(StructReductionTest, StructReductionMinMaxNoNull)
@@ -2754,7 +2767,7 @@ TEST_F(StructReductionTest, StructReductionMinMaxNoNull)
                          cudf::table_view{{expected_child1, expected_child2}},
                          true,
                          true,
-                         cudf::make_min_aggregation<reduce_aggregation>());
+                         *cudf::make_min_aggregation<reduce_aggregation>());
   }
 
   {
@@ -2764,7 +2777,7 @@ TEST_F(StructReductionTest, StructReductionMinMaxNoNull)
                          cudf::table_view{{expected_child1, expected_child2}},
                          true,
                          true,
-                         cudf::make_max_aggregation<reduce_aggregation>());
+                         *cudf::make_max_aggregation<reduce_aggregation>());
   }
 }
 
@@ -2802,7 +2815,7 @@ TEST_F(StructReductionTest, StructReductionMinMaxSlicedInput)
                          cudf::table_view{{expected_child1, expected_child2}},
                          true,
                          true,
-                         cudf::make_min_aggregation<reduce_aggregation>());
+                         *cudf::make_min_aggregation<reduce_aggregation>());
   }
 
   {
@@ -2812,7 +2825,7 @@ TEST_F(StructReductionTest, StructReductionMinMaxSlicedInput)
                          cudf::table_view{{expected_child1, expected_child2}},
                          true,
                          true,
-                         cudf::make_max_aggregation<reduce_aggregation>());
+                         *cudf::make_max_aggregation<reduce_aggregation>());
   }
 }
 
@@ -2851,7 +2864,7 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls)
                          cudf::table_view{{expected_child1, expected_child2}},
                          true,
                          true,
-                         cudf::make_min_aggregation<reduce_aggregation>());
+                         *cudf::make_min_aggregation<reduce_aggregation>());
   }
 
   {
@@ -2861,7 +2874,7 @@ TEST_F(StructReductionTest, StructReductionMinMaxWithNulls)
                          cudf::table_view{{expected_child1, expected_child2}},
                          true,
                          true,
-                         cudf::make_max_aggregation<reduce_aggregation>());
+                         *cudf::make_max_aggregation<reduce_aggregation>());
   }
 }
 
diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp
index 2053837fdd1..ec5bd182049 100644
--- a/cpp/tests/reductions/scan_tests.cpp
+++ b/cpp/tests/reductions/scan_tests.cpp
@@ -44,7 +44,7 @@ struct ScanTest : public BaseScanTest<T> {
 
   void scan_test(cudf::host_span<HostType const> v,
                  cudf::host_span<bool const> b,
-                 std::unique_ptr<scan_aggregation> const& agg,
+                 scan_aggregation const& agg,
                  scan_type inclusive,
                  null_policy null_handling,
                  numeric::scale_type scale)
@@ -63,7 +63,7 @@ struct ScanTest : public BaseScanTest<T> {
   // Overload to iterate the test over a few different scales for fixed-point tests
   void scan_test(cudf::host_span<HostType const> v,
                  cudf::host_span<bool const> b,
-                 std::unique_ptr<scan_aggregation> const& agg,
+                 scan_aggregation const& agg,
                  scan_type inclusive,
                  null_policy null_handling = null_policy::EXCLUDE)
   {
@@ -76,10 +76,10 @@ struct ScanTest : public BaseScanTest<T> {
     }
   }
 
-  bool params_supported(std::unique_ptr<scan_aggregation> const& agg, scan_type inclusive)
+  bool params_supported(scan_aggregation const& agg, scan_type inclusive)
   {
     bool supported = [&] {
-      switch (agg->kind) {
+      switch (agg.kind) {
         case aggregation::SUM: return std::is_invocable_v<cudf::DeviceSum, T, T>;
         case aggregation::PRODUCT: return std::is_invocable_v<cudf::DeviceProduct, T, T>;
         case aggregation::MIN: return std::is_invocable_v<cudf::DeviceMin, T, T>;
@@ -91,17 +91,16 @@ struct ScanTest : public BaseScanTest<T> {
     }();
 
     // special cases for individual types
-    if constexpr (cudf::is_fixed_point<T>())
-      return supported && (agg->kind != aggregation::PRODUCT);
+    if constexpr (cudf::is_fixed_point<T>()) return supported && (agg.kind != aggregation::PRODUCT);
     if constexpr (std::is_same_v<T, cudf::string_view> || cudf::is_timestamp<T>())
       return supported && (inclusive == scan_type::INCLUSIVE);
     return supported;
   }
 
-  std::function<HostType(HostType, HostType)> make_agg(std::unique_ptr<scan_aggregation> const& agg)
+  std::function<HostType(HostType, HostType)> make_agg(scan_aggregation const& agg)
   {
     if constexpr (std::is_same_v<T, cudf::string_view>) {
-      switch (agg->kind) {
+      switch (agg.kind) {
         case aggregation::MIN: return [](HostType a, HostType b) { return std::min(a, b); };
         case aggregation::MAX: return [](HostType a, HostType b) { return std::max(a, b); };
         default: {
@@ -110,7 +109,7 @@ struct ScanTest : public BaseScanTest<T> {
         }
       }
     } else {
-      switch (agg->kind) {
+      switch (agg.kind) {
         case aggregation::SUM: return std::plus<HostType>{};
         case aggregation::PRODUCT: return std::multiplies<HostType>{};
         case aggregation::MIN: return [](HostType a, HostType b) { return std::min(a, b); };
@@ -123,16 +122,16 @@ struct ScanTest : public BaseScanTest<T> {
     }
   }
 
-  HostType make_identity(std::unique_ptr<scan_aggregation> const& agg)
+  HostType make_identity(scan_aggregation const& agg)
   {
     if constexpr (std::is_same_v<T, cudf::string_view>) {
-      switch (agg->kind) {
+      switch (agg.kind) {
         case aggregation::MIN: return std::string{"\xF7\xBF\xBF\xBF"};
         case aggregation::MAX: return std::string{};
         default: CUDF_FAIL("Unsupported aggregation");
       }
     } else {
-      switch (agg->kind) {
+      switch (agg.kind) {
         case aggregation::SUM: return HostType{0};
         case aggregation::PRODUCT: return HostType{1};
         case aggregation::MIN:
@@ -154,7 +153,7 @@ struct ScanTest : public BaseScanTest<T> {
 
   std::unique_ptr<cudf::column> make_expected(cudf::host_span<HostType const> v,
                                               cudf::host_span<bool const> b,
-                                              std::unique_ptr<scan_aggregation> const& agg,
+                                              scan_aggregation const& agg,
                                               scan_type inclusive,
                                               null_policy null_handling,
                                               numeric::scale_type scale = numeric::scale_type{0})
@@ -220,28 +219,28 @@ TYPED_TEST(ScanTest, Min)
   auto const b = thrust::host_vector<bool>(std::vector<bool>{1, 0, 1, 1, 1, 1, 0, 0, 1});
 
   // no nulls
-  this->scan_test(v, {}, cudf::make_min_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
-  this->scan_test(v, {}, cudf::make_min_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_min_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_min_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
   // skipna = true (default)
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
   // skipna = false
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::INCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::INCLUDE);
 }
@@ -253,28 +252,28 @@ TYPED_TEST(ScanTest, Max)
 
   // inclusive
   // no nulls
-  this->scan_test(v, {}, cudf::make_max_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
-  this->scan_test(v, {}, cudf::make_max_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_max_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_max_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
   // skipna = true (default)
   this->scan_test(v,
                   b,
-                  cudf::make_max_aggregation<scan_aggregation>(),
+                  *cudf::make_max_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_max_aggregation<scan_aggregation>(),
+                  *cudf::make_max_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
   // skipna = false
   this->scan_test(v,
                   b,
-                  cudf::make_max_aggregation<scan_aggregation>(),
+                  *cudf::make_max_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::INCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_max_aggregation<scan_aggregation>(),
+                  *cudf::make_max_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::INCLUDE);
 }
@@ -285,28 +284,28 @@ TYPED_TEST(ScanTest, Product)
   auto const b = thrust::host_vector<bool>(std::vector<bool>{1, 1, 1, 0, 1, 1});
 
   // no nulls
-  this->scan_test(v, {}, cudf::make_product_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
-  this->scan_test(v, {}, cudf::make_product_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_product_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_product_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
   // skipna = true (default)
   this->scan_test(v,
                   b,
-                  cudf::make_product_aggregation<scan_aggregation>(),
+                  *cudf::make_product_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_product_aggregation<scan_aggregation>(),
+                  *cudf::make_product_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
   // skipna = false
   this->scan_test(v,
                   b,
-                  cudf::make_product_aggregation<scan_aggregation>(),
+                  *cudf::make_product_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::INCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_product_aggregation<scan_aggregation>(),
+                  *cudf::make_product_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::INCLUDE);
 }
@@ -321,28 +320,28 @@ TYPED_TEST(ScanTest, Sum)
   auto const b = thrust::host_vector<bool>(std::vector<bool>{1, 0, 1, 1, 0, 0, 1, 1, 1, 1});
 
   // no nulls
-  this->scan_test(v, {}, cudf::make_sum_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
-  this->scan_test(v, {}, cudf::make_sum_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_sum_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_sum_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
   // skipna = true (default)
   this->scan_test(v,
                   b,
-                  cudf::make_sum_aggregation<scan_aggregation>(),
+                  *cudf::make_sum_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_sum_aggregation<scan_aggregation>(),
+                  *cudf::make_sum_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
   // skipna = false
   this->scan_test(v,
                   b,
-                  cudf::make_sum_aggregation<scan_aggregation>(),
+                  *cudf::make_sum_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::INCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_sum_aggregation<scan_aggregation>(),
+                  *cudf::make_sum_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::INCLUDE);
 }
@@ -355,23 +354,23 @@ TYPED_TEST(ScanTest, EmptyColumn)
   // skipna = true (default)
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
   // skipna = false
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::INCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::INCLUDE);
 }
@@ -384,23 +383,23 @@ TYPED_TEST(ScanTest, LeadingNulls)
   // skipna = true (default)
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
   // skipna = false
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::INCLUDE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::INCLUDE);
 }
@@ -423,35 +422,35 @@ TEST_F(ScanStringsTest, MoreStringsMinMax)
   thrust::host_vector<std::string> v(data_begin, data_begin + row_count);
   thrust::host_vector<bool> b(validity, validity + row_count);
 
-  this->scan_test(v, {}, cudf::make_min_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
-  this->scan_test(v, b, cudf::make_min_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_min_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, b, *cudf::make_min_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
 
-  this->scan_test(v, {}, cudf::make_min_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
-  this->scan_test(v, b, cudf::make_min_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_min_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, b, *cudf::make_min_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
   this->scan_test(v,
                   b,
-                  cudf::make_min_aggregation<scan_aggregation>(),
+                  *cudf::make_min_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
 
-  this->scan_test(v, {}, cudf::make_max_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
-  this->scan_test(v, b, cudf::make_max_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_max_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
+  this->scan_test(v, b, *cudf::make_max_aggregation<scan_aggregation>(), scan_type::INCLUSIVE);
   this->scan_test(v,
                   b,
-                  cudf::make_max_aggregation<scan_aggregation>(),
+                  *cudf::make_max_aggregation<scan_aggregation>(),
                   scan_type::INCLUSIVE,
                   null_policy::EXCLUDE);
 
-  this->scan_test(v, {}, cudf::make_max_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
-  this->scan_test(v, b, cudf::make_max_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, {}, *cudf::make_max_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
+  this->scan_test(v, b, *cudf::make_max_aggregation<scan_aggregation>(), scan_type::EXCLUSIVE);
   this->scan_test(v,
                   b,
-                  cudf::make_max_aggregation<scan_aggregation>(),
+                  *cudf::make_max_aggregation<scan_aggregation>(),
                   scan_type::EXCLUSIVE,
                   null_policy::EXCLUDE);
 }
@@ -470,11 +469,11 @@ TYPED_TEST(ScanChronoTest, ChronoMinMax)
                                                                           {1, 1, 1, 0, 1, 1, 1, 1});
 
   auto result =
-    cudf::scan(col, cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    cudf::scan(col, *cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected_min);
 
   result = cudf::scan(col,
-                      cudf::make_min_aggregation<scan_aggregation>(),
+                      *cudf::make_min_aggregation<scan_aggregation>(),
                       cudf::scan_type::INCLUSIVE,
                       cudf::null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected_min);
@@ -482,20 +481,20 @@ TYPED_TEST(ScanChronoTest, ChronoMinMax)
   cudf::test::fixed_width_column_wrapper<TypeParam, int32_t> expected_max({5, 5, 6, 0, 6, 6, 6, 6},
                                                                           {1, 1, 1, 0, 1, 1, 1, 1});
   result =
-    cudf::scan(col, cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    cudf::scan(col, *cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected_max);
 
   result = cudf::scan(col,
-                      cudf::make_max_aggregation<scan_aggregation>(),
+                      *cudf::make_max_aggregation<scan_aggregation>(),
                       cudf::scan_type::INCLUSIVE,
                       cudf::null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected_max);
 
   EXPECT_THROW(
-    cudf::scan(col, cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::EXCLUSIVE),
+    cudf::scan(col, *cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::EXCLUSIVE),
     cudf::logic_error);
   EXPECT_THROW(
-    cudf::scan(col, cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::EXCLUSIVE),
+    cudf::scan(col, *cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::EXCLUSIVE),
     cudf::logic_error);
 }
 
@@ -513,17 +512,17 @@ TYPED_TEST(ScanDurationTest, Sum)
                                                                       {1, 1, 1, 0, 1, 1, 1, 1});
 
   auto result =
-    cudf::scan(col, cudf::make_sum_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    cudf::scan(col, *cudf::make_sum_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
   result = cudf::scan(col,
-                      cudf::make_sum_aggregation<scan_aggregation>(),
+                      *cudf::make_sum_aggregation<scan_aggregation>(),
                       cudf::scan_type::INCLUSIVE,
                       cudf::null_policy::EXCLUDE);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
   EXPECT_THROW(
-    cudf::scan(col, cudf::make_sum_aggregation<scan_aggregation>(), cudf::scan_type::EXCLUSIVE),
+    cudf::scan(col, *cudf::make_sum_aggregation<scan_aggregation>(), cudf::scan_type::EXCLUSIVE),
     cudf::logic_error);
 }
 
@@ -548,8 +547,8 @@ TEST_F(StructScanTest, StructScanMinMaxNoNull)
       auto child2 = INTS_CW{1, 1, 1, 4, 4, 4, 4, 8, 8, 8};
       return STRUCTS_CW{{child1, child2}};
     }();
-    auto const result =
-      cudf::scan(input, cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    auto const result = cudf::scan(
+      input, *cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
   }
 
@@ -559,8 +558,8 @@ TEST_F(StructScanTest, StructScanMinMaxNoNull)
       auto child2 = INTS_CW{1, 2, 3, 3, 3, 3, 3, 3, 3, 3};
       return STRUCTS_CW{{child1, child2}};
     }();
-    auto const result =
-      cudf::scan(input, cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    auto const result = cudf::scan(
+      input, *cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
   }
 }
@@ -598,8 +597,8 @@ TEST_F(StructScanTest, StructScanMinMaxSlicedInput)
       auto child2 = INTS_CW{1, 1, 1, 4, 4, 4, 4, 8, 8, 8};
       return STRUCTS_CW{{child1, child2}};
     }();
-    auto const result =
-      cudf::scan(input, cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    auto const result = cudf::scan(
+      input, *cudf::make_min_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
   }
 
@@ -609,8 +608,8 @@ TEST_F(StructScanTest, StructScanMinMaxSlicedInput)
       auto child2 = INTS_CW{1, 2, 3, 3, 3, 3, 3, 3, 3, 3};
       return STRUCTS_CW{{child1, child2}};
     }();
-    auto const result =
-      cudf::scan(input, cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
+    auto const result = cudf::scan(
+      input, *cudf::make_max_aggregation<scan_aggregation>(), cudf::scan_type::INCLUSIVE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
   }
 }
@@ -670,7 +669,7 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls)
     }();
 
     auto const result = cudf::scan(input,
-                                   cudf::make_min_aggregation<scan_aggregation>(),
+                                   *cudf::make_min_aggregation<scan_aggregation>(),
                                    cudf::scan_type::INCLUSIVE,
                                    null_policy::EXCLUDE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -685,7 +684,7 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls)
     }();
 
     auto const result = cudf::scan(input,
-                                   cudf::make_max_aggregation<scan_aggregation>(),
+                                   *cudf::make_max_aggregation<scan_aggregation>(),
                                    cudf::scan_type::INCLUSIVE,
                                    null_policy::EXCLUDE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -719,7 +718,7 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls)
     }();
 
     auto const result = cudf::scan(input,
-                                   cudf::make_min_aggregation<scan_aggregation>(),
+                                   *cudf::make_min_aggregation<scan_aggregation>(),
                                    cudf::scan_type::INCLUSIVE,
                                    null_policy::INCLUDE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
@@ -751,7 +750,7 @@ TEST_F(StructScanTest, StructScanMinMaxWithNulls)
     }();
 
     auto const result = cudf::scan(input,
-                                   cudf::make_max_aggregation<scan_aggregation>(),
+                                   *cudf::make_max_aggregation<scan_aggregation>(),
                                    cudf::scan_type::INCLUSIVE,
                                    null_policy::INCLUDE);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, result->view());
diff --git a/cpp/tests/reductions/tdigest_tests.cu b/cpp/tests/reductions/tdigest_tests.cu
index 4a8f716c160..6e348b04f1c 100644
--- a/cpp/tests/reductions/tdigest_tests.cu
+++ b/cpp/tests/reductions/tdigest_tests.cu
@@ -35,7 +35,7 @@ struct reduce_op {
     // result is a scalar, but we want to extract out the underlying column
     auto scalar_result =
       cudf::reduce(values,
-                   cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
+                   *cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
                    cudf::data_type{cudf::type_id::STRUCT});
     auto tbl = static_cast<cudf::struct_scalar const*>(scalar_result.get())->view();
     std::vector<std::unique_ptr<cudf::column>> cols;
@@ -53,7 +53,7 @@ struct reduce_merge_op {
     // result is a scalar, but we want to extract out the underlying column
     auto scalar_result =
       cudf::reduce(values,
-                   cudf::make_merge_tdigest_aggregation<cudf::reduce_aggregation>(delta),
+                   *cudf::make_merge_tdigest_aggregation<cudf::reduce_aggregation>(delta),
                    cudf::data_type{cudf::type_id::STRUCT});
     auto tbl = static_cast<cudf::struct_scalar const*>(scalar_result.get())->view();
     std::vector<std::unique_ptr<cudf::column>> cols;
@@ -133,7 +133,7 @@ TEST_F(ReductionTDigestMerge, FewHeavyCentroids)
   // merge
   auto scalar_result =
     cudf::reduce(*values,
-                 cudf::make_merge_tdigest_aggregation<cudf::reduce_aggregation>(1000),
+                 *cudf::make_merge_tdigest_aggregation<cudf::reduce_aggregation>(1000),
                  cudf::data_type{cudf::type_id::STRUCT});
 
   // convert to a table
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 979c1f9f772..716a9f0b834 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -263,10 +263,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_reduce(JNIEnv *env, jclas
     auto agg = reinterpret_cast<cudf::aggregation *>(j_agg);
     cudf::data_type out_dtype = cudf::jni::make_data_type(j_dtype, scale);
     return release_as_jlong(
-        cudf::reduce(*col,
-                     std::unique_ptr<cudf::reduce_aggregation>(
-                         dynamic_cast<cudf::reduce_aggregation *>(agg->clone().release())),
-                     out_dtype));
+        cudf::reduce(*col, *dynamic_cast<cudf::reduce_aggregation *>(agg), out_dtype));
   }
   CATCH_STD(env, 0);
 }
@@ -321,10 +318,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_scan(JNIEnv *env, jclass,
     auto scan_type = is_inclusive ? cudf::scan_type::INCLUSIVE : cudf::scan_type::EXCLUSIVE;
     auto null_policy = include_nulls ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE;
     return release_as_jlong(
-        cudf::scan(*col,
-                   std::unique_ptr<cudf::scan_aggregation>(
-                       dynamic_cast<cudf::scan_aggregation *>(agg->clone().release())),
-                   scan_type, null_policy));
+        cudf::scan(*col, *dynamic_cast<cudf::scan_aggregation *>(agg), scan_type, null_policy));
   }
   CATCH_STD(env, 0);
 }
diff --git a/python/cudf/cudf/_lib/cpp/reduce.pxd b/python/cudf/cudf/_lib/cpp/reduce.pxd
index 22ae09346ed..7952c717916 100644
--- a/python/cudf/cudf/_lib/cpp/reduce.pxd
+++ b/python/cudf/cudf/_lib/cpp/reduce.pxd
@@ -14,7 +14,7 @@ from cudf._lib.scalar cimport DeviceScalar
 cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
     cdef unique_ptr[scalar] cpp_reduce "cudf::reduce" (
         column_view col,
-        const unique_ptr[reduce_aggregation] agg,
+        const reduce_aggregation& agg,
         data_type type
     ) except +
 
@@ -24,7 +24,7 @@ cdef extern from "cudf/reduction.hpp" namespace "cudf" nogil:
 
     cdef unique_ptr[column] cpp_scan "cudf::scan" (
         column_view col,
-        const unique_ptr[scan_aggregation] agg,
+        const scan_aggregation& agg,
         scan_type inclusive
     ) except +
 
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index c1494df9cac..ec338eeee70 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cython.operator import dereference
+
 import cudf
 from cudf.api.types import is_decimal_dtype
 
@@ -74,7 +76,7 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
     with nogil:
         c_result = move(cpp_reduce(
             c_incol_view,
-            cython_agg.c_obj,
+            dereference(cython_agg.c_obj),
             c_out_dtype
         ))
 
@@ -112,7 +114,7 @@ def scan(scan_op, Column incol, inclusive, **kwargs):
     with nogil:
         c_result = move(cpp_scan(
             c_incol_view,
-            cython_agg.c_obj,
+            dereference(cython_agg.c_obj),
             c_inclusive
         ))
 

From 7940b5bb985e0db805f7effb0eb3de53e7f57b88 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 21 Oct 2022 11:00:32 -0700
Subject: [PATCH 059/202] Fix maximum page size estimate in Parquet writer
 (#11962)

Closes https://github.com/rapidsai/cudf/issues/11916

cuda memcheck reports an OOB write in one of the tests. The root cause is an underallocated buffer for encoded pages.
This PR fixes the computation of the maximum size of data pages (RLE encoded) when dictionary encoding is used.
Other changes:
Refactored max RLE page size computation to avoid code repetition.
Use actual dictionary index width instead of (outdated) worst case.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11962
---
 cpp/src/io/parquet/page_enc.cu | 38 ++++++++++++++++------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 7c5651b1ef8..15bd4fe17e3 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -228,6 +228,14 @@ __global__ void __launch_bounds__(128)
   }
 }
 
+constexpr uint32_t max_RLE_page_size(uint8_t value_bit_width, uint32_t num_values)
+{
+  if (value_bit_width == 0) return 0;
+
+  // Run length = 4, max(rle/bitpack header) = 5, add one byte per 256 values for overhead
+  return 4 + 5 + util::div_rounding_up_unsafe(num_values * value_bit_width, 8) + (num_values / 256);
+}
+
 // blockDim {128,1,1}
 __global__ void __launch_bounds__(128)
   gpuInitPages(device_2dspan<EncColumnChunk> chunks,
@@ -340,7 +348,7 @@ __global__ void __launch_bounds__(128)
       __syncwarp();
       uint32_t fragment_data_size =
         (ck_g.use_dictionary)
-          ? frag_g.num_leaf_values * 2  // Assume worst-case of 2-bytes per dictionary index
+          ? frag_g.num_leaf_values * util::div_rounding_up_unsafe(ck_g.dict_rle_bits, 8)
           : frag_g.fragment_data_size;
       // TODO (dm): this convoluted logic to limit page size needs refactoring
       size_t this_max_page_size = (values_in_page * 2 >= ck_g.num_values)   ? 256 * 1024
@@ -354,8 +362,8 @@ __global__ void __launch_bounds__(128)
           (values_in_page > 0 && (page_size + fragment_data_size > this_max_page_size)) ||
           rows_in_page >= max_page_size_rows) {
         if (ck_g.use_dictionary) {
-          page_size =
-            1 + 5 + ((values_in_page * ck_g.dict_rle_bits + 7) >> 3) + (values_in_page >> 8);
+          // Additional byte to store entry bit width
+          page_size = 1 + max_RLE_page_size(ck_g.dict_rle_bits, values_in_page);
         }
         if (!t) {
           page_g.num_fragments = fragments_in_chunk - page_start;
@@ -378,23 +386,13 @@ __global__ void __launch_bounds__(128)
           if (not comp_page_sizes.empty()) {
             page_g.compressed_data = ck_g.compressed_bfr + comp_page_offset;
           }
-          page_g.start_row        = cur_row;
-          page_g.num_rows         = rows_in_page;
-          page_g.num_leaf_values  = leaf_values_in_page;
-          page_g.num_values       = values_in_page;
-          uint32_t def_level_bits = col_g.num_def_level_bits();
-          uint32_t rep_level_bits = col_g.num_rep_level_bits();
-          // Run length = 4, max(rle/bitpack header) = 5, add one byte per 256 values for overhead
-          // TODO (dm): Improve readability of these calculations.
-          uint32_t def_level_size =
-            (def_level_bits != 0)
-              ? 4 + 5 + ((def_level_bits * page_g.num_values + 7) >> 3) + (page_g.num_values >> 8)
-              : 0;
-          uint32_t rep_level_size =
-            (rep_level_bits != 0)
-              ? 4 + 5 + ((rep_level_bits * page_g.num_values + 7) >> 3) + (page_g.num_values >> 8)
-              : 0;
-          page_g.max_data_size = page_size + def_level_size + rep_level_size;
+          page_g.start_row          = cur_row;
+          page_g.num_rows           = rows_in_page;
+          page_g.num_leaf_values    = leaf_values_in_page;
+          page_g.num_values         = values_in_page;
+          auto const def_level_size = max_RLE_page_size(col_g.num_def_level_bits(), values_in_page);
+          auto const rep_level_size = max_RLE_page_size(col_g.num_rep_level_bits(), values_in_page);
+          page_g.max_data_size      = page_size + def_level_size + rep_level_size;
 
           pagestats_g.start_chunk = ck_g.first_fragment + page_start;
           pagestats_g.num_chunks  = page_g.num_fragments;

From f1ab5e979fc5495cebafd9858700aadde6be96f4 Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Fri, 21 Oct 2022 12:39:36 -0700
Subject: [PATCH 060/202] add V2 page header support to parquet reader (#11778)

Adds support for reading parquet files with V2 page headers. Fixes #11686

~~Submitting as draft for now because I'm not sure how to do unit tests for this.  libcudf cannot produce files with V2 headers, so I would need to either add files to a data directory somewhere, or add raw binary of some parquet files to parquet_test.cpp. Given the comment on the `DecimalRead` test, neither seems attractive. Suggestions are welcome.  Perhaps use python to test?~~

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/11778
---
 cpp/src/io/parquet/page_data.cu        | 18 ++++---
 cpp/src/io/parquet/page_hdr.cu         | 17 +++++-
 cpp/src/io/parquet/parquet_gpu.hpp     |  5 ++
 cpp/src/io/parquet/reader_impl.cu      | 42 +++++++++++----
 python/cudf/cudf/tests/test_parquet.py | 74 ++++++++++++++++++++++++++
 5 files changed, 138 insertions(+), 18 deletions(-)

diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 57d55be6145..6c314261a13 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -146,11 +146,18 @@ __device__ uint32_t InitLevelSection(page_state_s* s,
     s->initial_rle_value[lvl] = 0;
     s->lvl_start[lvl]         = cur;
   } else if (encoding == Encoding::RLE) {
-    if (cur + 4 < end) {
-      uint32_t run;
+    // V2 only uses RLE encoding, so only perform check here
+    if (s->page.def_lvl_bytes || s->page.rep_lvl_bytes) {
+      len = lvl == level_type::DEFINITION ? s->page.def_lvl_bytes : s->page.rep_lvl_bytes;
+    } else if (cur + 4 < end) {
       len = 4 + (cur[0]) + (cur[1] << 8) + (cur[2] << 16) + (cur[3] << 24);
       cur += 4;
-      run                     = get_vlq32(cur, end);
+    } else {
+      len      = 0;
+      s->error = 2;
+    }
+    if (!s->error) {
+      uint32_t run            = get_vlq32(cur, end);
       s->initial_rle_run[lvl] = run;
       if (!(run & 1)) {
         int v = (cur < end) ? cur[0] : 0;
@@ -163,9 +170,6 @@ __device__ uint32_t InitLevelSection(page_state_s* s,
       }
       s->lvl_start[lvl] = cur;
       if (cur > end) { s->error = 2; }
-    } else {
-      len      = 0;
-      s->error = 2;
     }
   } else if (encoding == Encoding::BIT_PACKED) {
     len                       = (s->page.num_input_values * level_bits + 7) >> 3;
@@ -176,7 +180,7 @@ __device__ uint32_t InitLevelSection(page_state_s* s,
     s->error = 3;
     len      = 0;
   }
-  return (uint32_t)len;
+  return static_cast<uint32_t>(len);
 }
 
 /**
diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu
index e7856a871c1..19922bf7022 100644
--- a/cpp/src/io/parquet/page_hdr.cu
+++ b/cpp/src/io/parquet/page_hdr.cu
@@ -307,10 +307,11 @@ struct gpuParseDataPageHeaderV2 {
   __device__ bool operator()(byte_stream_s* bs)
   {
     auto op = thrust::make_tuple(ParquetFieldInt32(1, bs->page.num_input_values),
+                                 ParquetFieldInt32(2, bs->page.num_nulls),
                                  ParquetFieldInt32(3, bs->page.num_rows),
                                  ParquetFieldEnum<Encoding>(4, bs->page.encoding),
-                                 ParquetFieldEnum<Encoding>(5, bs->page.definition_level_encoding),
-                                 ParquetFieldEnum<Encoding>(6, bs->page.repetition_level_encoding));
+                                 ParquetFieldInt32(5, bs->page.def_lvl_bytes),
+                                 ParquetFieldInt32(6, bs->page.rep_lvl_bytes));
     return parse_header(op, bs);
   }
 };
@@ -382,18 +383,30 @@ __global__ void __launch_bounds__(128)
         // definition levels
         bs->page.chunk_row += bs->page.num_rows;
         bs->page.num_rows = 0;
+        // zero out V2 info
+        bs->page.num_nulls     = 0;
+        bs->page.def_lvl_bytes = 0;
+        bs->page.rep_lvl_bytes = 0;
         if (parse_page_header(bs) && bs->page.compressed_page_size >= 0) {
           switch (bs->page_type) {
             case PageType::DATA_PAGE:
+              index_out = num_dict_pages + data_page_count;
+              data_page_count++;
+              bs->page.flags = 0;
               // this computation is only valid for flat schemas. for nested schemas,
               // they will be recomputed in the preprocess step by examining repetition and
               // definition levels
               bs->page.num_rows = bs->page.num_input_values;
+              values_found += bs->page.num_input_values;
+              break;
             case PageType::DATA_PAGE_V2:
               index_out = num_dict_pages + data_page_count;
               data_page_count++;
               bs->page.flags = 0;
               values_found += bs->page.num_input_values;
+              // V2 only uses RLE, so it was removed from the header
+              bs->page.definition_level_encoding = Encoding::RLE;
+              bs->page.repetition_level_encoding = Encoding::RLE;
               break;
             case PageType::DICTIONARY_PAGE:
               index_out = dictionary_page_count;
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 38a0d70b0f8..c31a1531fa7 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -128,6 +128,10 @@ struct PageInfo {
                        // decompression
   int32_t compressed_page_size;    // compressed data size in bytes
   int32_t uncompressed_page_size;  // uncompressed data size in bytes
+  // for V2 pages, the def and rep level data is not compressed, and lacks the 4-byte length
+  // indicator. instead the lengths for these are stored in the header.
+  int32_t def_lvl_bytes;  // length of the definition levels (V2 header)
+  int32_t rep_lvl_bytes;  // length of the repetition levels (V2 header)
   // Number of values in this data page or dictionary.
   // Important : the # of input values does not necessarily
   // correspond to the number of rows in the output. It just reflects the number
@@ -138,6 +142,7 @@ struct PageInfo {
   int32_t num_input_values;
   int32_t chunk_row;       // starting row of this page relative to the start of the chunk
   int32_t num_rows;        // number of rows in this page
+  int32_t num_nulls;       // number of null values (V2 header)
   int32_t chunk_idx;       // column chunk this page belongs to
   int32_t src_col_schema;  // schema index of this column
   uint8_t flags;           // PAGEINFO_FLAGS_XXX
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 0997d2a968d..535641654a2 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -1151,6 +1151,12 @@ rmm::device_buffer reader::impl::decompress_page_data(
   std::vector<device_span<uint8_t>> comp_out;
   comp_out.reserve(num_comp_pages);
 
+  // vectors to save v2 def and rep level data, if any
+  std::vector<device_span<uint8_t const>> copy_in;
+  copy_in.reserve(num_comp_pages);
+  std::vector<device_span<uint8_t>> copy_out;
+  copy_out.reserve(num_comp_pages);
+
   rmm::device_uvector<compression_result> comp_res(num_comp_pages, _stream);
   thrust::fill(rmm::exec_policy(_stream),
                comp_res.begin(),
@@ -1162,15 +1168,24 @@ rmm::device_buffer reader::impl::decompress_page_data(
   for (const auto& codec : codecs) {
     if (codec.num_pages == 0) { continue; }
 
-    for_each_codec_page(codec.compression_type, [&](size_t page) {
-      auto dst_base = static_cast<uint8_t*>(decomp_pages.data());
-      comp_in.emplace_back(pages[page].page_data,
-                           static_cast<size_t>(pages[page].compressed_page_size));
-      comp_out.emplace_back(dst_base + decomp_offset,
-                            static_cast<size_t>(pages[page].uncompressed_page_size));
-
-      pages[page].page_data = static_cast<uint8_t*>(comp_out.back().data());
-      decomp_offset += comp_out.back().size();
+    for_each_codec_page(codec.compression_type, [&](size_t page_idx) {
+      auto const dst_base = static_cast<uint8_t*>(decomp_pages.data()) + decomp_offset;
+      auto& page          = pages[page_idx];
+      // offset will only be non-zero for V2 pages
+      auto const offset = page.def_lvl_bytes + page.rep_lvl_bytes;
+      // for V2 need to copy def and rep level info into place, and then offset the
+      // input and output buffers. otherwise we'd have to keep both the compressed
+      // and decompressed data.
+      if (offset != 0) {
+        copy_in.emplace_back(page.page_data, offset);
+        copy_out.emplace_back(dst_base, offset);
+      }
+      comp_in.emplace_back(page.page_data + offset,
+                           static_cast<size_t>(page.compressed_page_size - offset));
+      comp_out.emplace_back(dst_base + offset,
+                            static_cast<size_t>(page.uncompressed_page_size - offset));
+      page.page_data = dst_base;
+      decomp_offset += page.uncompressed_page_size;
     });
 
     host_span<device_span<uint8_t const> const> comp_in_view{comp_in.data() + start_pos,
@@ -1222,6 +1237,15 @@ rmm::device_buffer reader::impl::decompress_page_data(
 
   decompress_check(comp_res, _stream);
 
+  // now copy the uncompressed V2 def and rep level data
+  if (not copy_in.empty()) {
+    auto const d_copy_in  = cudf::detail::make_device_uvector_async(copy_in, _stream);
+    auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, _stream);
+
+    gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, _stream);
+    _stream.synchronize();
+  }
+
   // Update the page information in device memory with the updated value of
   // page_data; it now points to the uncompressed data buffer
   pages.host_to_device(_stream);
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 022f7cdd6f7..2ac1dfda344 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -1296,6 +1296,80 @@ def string_list_gen_wrapped(x, y):
     assert expect.equals(got.to_arrow())
 
 
+def test_parquet_reader_v2(tmpdir, simple_pdf):
+    pdf_fname = tmpdir.join("pdfv2.parquet")
+    simple_pdf.to_parquet(pdf_fname, data_page_version="2.0")
+    assert_eq(cudf.read_parquet(pdf_fname), simple_pdf)
+
+
+@pytest.mark.parametrize(
+    "data",
+    [
+        # Structs
+        {
+            "being": [
+                None,
+                {"human?": True, "Deets": {"Name": "Carrot", "Age": 27}},
+                {"human?": None, "Deets": {"Name": "Angua", "Age": 25}},
+                {"human?": False, "Deets": {"Name": "Cheery", "Age": 31}},
+                {"human?": False, "Deets": None},
+                {"human?": None, "Deets": {"Name": "Mr", "Age": None}},
+            ]
+        },
+        # List of Structs
+        {
+            "family": [
+                [None, {"human?": True, "deets": {"weight": 2.4, "age": 27}}],
+                [
+                    {"human?": None, "deets": {"weight": 5.3, "age": 25}},
+                    {"human?": False, "deets": {"weight": 8.0, "age": 31}},
+                    {"human?": False, "deets": None},
+                ],
+                [],
+                [{"human?": None, "deets": {"weight": 6.9, "age": None}}],
+            ]
+        },
+        # Struct of Lists
+        {
+            "Real estate records": [
+                None,
+                {
+                    "Status": "NRI",
+                    "Ownerships": {
+                        "land_unit": [None, 2, None],
+                        "flats": [[1, 2, 3], [], [4, 5], [], [0, 6, 0]],
+                    },
+                },
+                {
+                    "Status": None,
+                    "Ownerships": {
+                        "land_unit": [4, 5],
+                        "flats": [[7, 8], []],
+                    },
+                },
+                {
+                    "Status": "RI",
+                    "Ownerships": {"land_unit": None, "flats": [[]]},
+                },
+                {"Status": "RI", "Ownerships": None},
+                {
+                    "Status": None,
+                    "Ownerships": {
+                        "land_unit": [7, 8, 9],
+                        "flats": [[], [], []],
+                    },
+                },
+            ]
+        },
+    ],
+)
+def test_parquet_reader_nested_v2(tmpdir, data):
+    expect = pd.DataFrame(data)
+    pdf_fname = tmpdir.join("pdfv2.parquet")
+    expect.to_parquet(pdf_fname, data_page_version="2.0")
+    assert_eq(cudf.read_parquet(pdf_fname), expect)
+
+
 @pytest.mark.filterwarnings("ignore:Using CPU")
 def test_parquet_writer_cpu_pyarrow(
     tmpdir, pdf_day_timestamps, gdf_day_timestamps

From 5c2150e0d942fa525205451cd954e48ff35b8a84 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 21 Oct 2022 15:10:17 -0500
Subject: [PATCH 061/202] Default to equal NaNs in make_merge_sets_aggregation.
 (#11952)

Partially resolves https://github.com/rapidsai/cudf/issues/11329. This helps to align our default behaviors for null and NaN equality across APIs, specifically for `make_merge_sets_aggregation` in this PR. All functions should default to treating null values as equal to one another and NaN values as equal to one another.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/11952
---
 cpp/include/cudf/aggregation.hpp           | 5 +++--
 cpp/tests/reductions/collect_ops_tests.cpp | 9 ++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp
index a92da0b0347..d319041f8b1 100644
--- a/cpp/include/cudf/aggregation.hpp
+++ b/cpp/include/cudf/aggregation.hpp
@@ -589,8 +589,9 @@ std::unique_ptr<Base> make_merge_lists_aggregation();
  * @return A MERGE_SETS aggregation object
  */
 template <typename Base = aggregation>
-std::unique_ptr<Base> make_merge_sets_aggregation(null_equality nulls_equal = null_equality::EQUAL,
-                                                  nan_equality nans_equal = nan_equality::UNEQUAL);
+std::unique_ptr<Base> make_merge_sets_aggregation(
+  null_equality nulls_equal = null_equality::EQUAL,
+  nan_equality nans_equal   = nan_equality::ALL_EQUAL);
 
 /**
  * @brief Factory to create a MERGE_M2 aggregation
diff --git a/cpp/tests/reductions/collect_ops_tests.cpp b/cpp/tests/reductions/collect_ops_tests.cpp
index 2bb13fd671f..90014c3b10f 100644
--- a/cpp/tests/reductions/collect_ops_tests.cpp
+++ b/cpp/tests/reductions/collect_ops_tests.cpp
@@ -243,14 +243,17 @@ TEST_F(CollectTest, MergeSetsWithNaN)
 
   // nan unequal with null equal
   fp_wrapper expected1{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, NAN, NAN, 0.0f}, {1, 1, 1, 1, 1, 1, 0}};
-  auto const ret1 = collect_set(col, make_merge_sets_aggregation<reduce_aggregation>());
+  auto const ret1 = collect_set(
+    col,
+    make_merge_sets_aggregation<reduce_aggregation>(null_equality::EQUAL, nan_equality::UNEQUAL));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected1, dynamic_cast<list_scalar*>(ret1.get())->view());
 
   // nan unequal with null unequal
   fp_wrapper expected2{{-2.3e-5f, 1.0f, 2.3e5f, -NAN, NAN, NAN, 0.0f, 0.0f, 0.0f},
                        {1, 1, 1, 1, 1, 1, 0, 0, 0}};
-  auto const ret2 =
-    collect_set(col, make_merge_sets_aggregation<reduce_aggregation>(null_equality::UNEQUAL));
+  auto const ret2 = collect_set(
+    col,
+    make_merge_sets_aggregation<reduce_aggregation>(null_equality::UNEQUAL, nan_equality::UNEQUAL));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected2, dynamic_cast<list_scalar*>(ret2.get())->view());
 
   // nan equal with null equal

From 5a190b966b6ad3d745915e358b15dafdb24a4185 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Mon, 24 Oct 2022 12:26:15 -0400
Subject: [PATCH 062/202] Switch over to rapids-cmake patches for thrust
 (#11921)

Now that rapids-cmake supports custom patches we can move cudf over to rapids-cmake for Thrust. This removes the need for custom install rules in cudf for Thrust, as rapids-cmake does that for us.

 This also separates out all Thrust patches so that we can better track upstream approval and remove as needed.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/11921
---
 .gitignore                                    |   1 -
 cpp/cmake/thirdparty/get_thrust.cmake         |  77 +++-------
 .../thrust_disable_64bit_dispatching.diff     |  29 ++++
 .../thrust_faster_scan_compile_times.diff     |  39 +++++
 .../thrust_faster_sort_compile_times.diff     |  49 ++++++
 .../thirdparty/patches/thrust_override.json   |  34 +++++
 ...ust_transform_iter_with_reduce_by_key.diff |  26 ++++
 cpp/cmake/thrust.patch                        | 142 ------------------
 8 files changed, 196 insertions(+), 201 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_override.json
 create mode 100644 cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
 delete mode 100644 cpp/cmake/thrust.patch

diff --git a/.gitignore b/.gitignore
index 0d63c76bf9f..aaac92ff643 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,7 +70,6 @@ junit-cudf.xml
 test-results
 
 ## Patching
-*.diff
 *.orig
 *.rej
 
diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake
index 379b1521bf0..25a4c9dd3ba 100644
--- a/cpp/cmake/thirdparty/get_thrust.cmake
+++ b/cpp/cmake/thirdparty/get_thrust.cmake
@@ -13,73 +13,34 @@
 # =============================================================================
 
 # This function finds thrust and sets any additional necessary environment variables.
-function(find_and_configure_thrust VERSION)
-  # We only want to set `UPDATE_DISCONNECTED` while the GIT tag hasn't moved from the last time we
-  # cloned
-  set(cpm_thrust_disconnect_update "UPDATE_DISCONNECTED TRUE")
-  set(CPM_THRUST_CURRENT_VERSION
-      ${VERSION}
-      CACHE STRING "version of thrust we checked out"
-  )
-  if(NOT VERSION VERSION_EQUAL CPM_THRUST_CURRENT_VERSION)
-    set(CPM_THRUST_CURRENT_VERSION
-        ${VERSION}
-        CACHE STRING "version of thrust we checked out" FORCE
-    )
-    set(cpm_thrust_disconnect_update "")
-  endif()
+function(find_and_configure_thrust)
 
-  # We currently require cuDF to always build with a custom version of thrust. This is needed so
-  # that build times of of cudf are kept reasonable, without this CI builds of cudf will be killed
-  # as some source file can take over 45 minutes to build
-  #
-  set(CPM_DOWNLOAD_ALL TRUE)
-  rapids_cpm_find(
-    Thrust ${VERSION}
-    BUILD_EXPORT_SET cudf-exports
-    INSTALL_EXPORT_SET cudf-exports
-    CPM_ARGS
-    GIT_REPOSITORY https://github.com/NVIDIA/thrust.git
-    GIT_TAG ${VERSION}
-    GIT_SHALLOW TRUE ${cpm_thrust_disconnect_update}
-    PATCH_COMMAND patch --reject-file=- -p1 -N < ${CUDF_SOURCE_DIR}/cmake/thrust.patch || true
-    OPTIONS "THRUST_INSTALL TRUE"
-  )
+  include(${rapids-cmake-dir}/cpm/thrust.cmake)
+  include(${rapids-cmake-dir}/cpm/package_override.cmake)
 
-  if(NOT TARGET cudf::Thrust)
-    thrust_create_target(cudf::Thrust FROM_OPTIONS)
-  endif()
+  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+  rapids_cpm_package_override("${cudf_patch_dir}/thrust_override.json")
 
-  if(Thrust_SOURCE_DIR) # only install thrust when we have an in-source version
-    include(GNUInstallDirs)
-    install(
-      DIRECTORY "${Thrust_SOURCE_DIR}/thrust"
-      DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/"
-      FILES_MATCHING
-      REGEX "\\.(h|inl)$"
-    )
-    install(
-      DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub"
-      DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/dependencies/"
-      FILES_MATCHING
-      PATTERN "*.cuh"
-    )
+  # Make sure we install thrust into the `include/libcudf` subdirectory instead of the default
+  include(GNUInstallDirs)
+  set(CMAKE_INSTALL_INCLUDEDIR "${CMAKE_INSTALL_INCLUDEDIR}/libcudf")
+  set(CMAKE_INSTALL_LIBDIR "${CMAKE_INSTALL_INCLUDEDIR}/lib")
 
-    install(DIRECTORY "${Thrust_SOURCE_DIR}/thrust/cmake"
-            DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/thrust/"
-    )
-    install(DIRECTORY "${Thrust_SOURCE_DIR}/dependencies/cub/cub/cmake"
-            DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/libcudf/Thrust/dependencies/cub/"
-    )
+  # Find or install Thrust with our custom set of patches
+  rapids_cpm_thrust(
+    NAMESPACE cudf
+    BUILD_EXPORT_SET cudf-exports
+    INSTALL_EXPORT_SET cudf-exports
+  )
 
+  if(Thrust_SOURCE_DIR)
     # Store where CMake can find our custom Thrust install
     include("${rapids-cmake-dir}/export/find_package_root.cmake")
     rapids_export_find_package_root(
-      INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/Thrust/]=] cudf-exports
+      INSTALL Thrust [=[${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/lib/cmake/thrust]=]
+      cudf-exports
     )
   endif()
 endfunction()
 
-set(CUDF_MIN_VERSION_Thrust 1.17.2)
-
-find_and_configure_thrust(${CUDF_MIN_VERSION_Thrust})
+find_and_configure_thrust()
diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
new file mode 100644
index 00000000000..382f7dca1b0
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
@@ -0,0 +1,29 @@
+diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h
+index d0e3f94..76774b0 100644
+--- a/thrust/system/cuda/detail/dispatch.h
++++ b/thrust/system/cuda/detail/dispatch.h
+@@ -32,9 +32,8 @@
+         status = call arguments; \
+     } \
+     else { \
+-        auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
+-        status = call arguments; \
+-    }
++       throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
++    }
+
+ /**
+  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
+@@ -52,10 +51,8 @@
+         status = call arguments; \
+     } \
+     else { \
+-        auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
+-        auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
+-        status = call arguments; \
+-    }
++       throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
++    }
+ /**
+  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
+  * implementation. This version allows using different token sequences for callables
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
new file mode 100644
index 00000000000..6bf165805cc
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
@@ -0,0 +1,39 @@
+diff --git a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+index b188c75f..3f36656f 100644
+--- a/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
++++ b/dependencies/cub/cub/device/dispatch/dispatch_radix_sort.cuh
+@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy
+
+
+     /// SM60 (GP100)
+-    struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
++    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+     {
+         enum {
+             PRIMARY_RADIX_BITS      = (sizeof(KeyT) > 1) ? 7 : 5,    // 6.9B 32b keys/s (Quadro P100)
+diff --git a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
+index e0470ccb..6a0c2ed6 100644
+--- a/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
++++ b/dependencies/cub/cub/device/dispatch/dispatch_reduce.cuh
+@@ -280,7 +280,7 @@ struct DeviceReducePolicy
+     };
+
+     /// SM60
+-    struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
++    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+     {
+         // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
+         typedef AgentReducePolicy<
+diff --git a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
+index c2d04588..ac2d10e0 100644
+--- a/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
++++ b/dependencies/cub/cub/device/dispatch/dispatch_scan.cuh
+@@ -177,7 +177,7 @@ struct DeviceScanPolicy
+     };
+
+     /// SM600
+-    struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
++    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+     {
+         typedef AgentScanPolicy<
+                 128, 15,                                        ///< Threads per block, items per thread
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
new file mode 100644
index 00000000000..e5d62e87ca4
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
@@ -0,0 +1,49 @@
+diff --git a/dependencies/cub/cub/block/block_merge_sort.cuh b/dependencies/cub/cub/block/block_merge_sort.cuh
+index 4769df36..d86d6342 100644
+--- a/dependencies/cub/cub/block/block_merge_sort.cuh
++++ b/dependencies/cub/cub/block/block_merge_sort.cuh
+@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
+   KeyT key1 = keys_shared[keys1_beg];
+   KeyT key2 = keys_shared[keys2_beg];
+
+-#pragma unroll
++#pragma unroll 1
+   for (int item = 0; item < ITEMS_PER_THREAD; ++item)
+   {
+     bool p = (keys2_beg < keys2_end) &&
+@@ -383,7 +383,7 @@ public:
+       //
+       KeyT max_key = oob_default;
+
+-      #pragma unroll
++      #pragma unroll 1
+       for (int item = 1; item < ITEMS_PER_THREAD; ++item)
+       {
+         if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
+@@ -407,7 +407,7 @@ public:
+     // each thread has sorted keys
+     // merge sort keys in shared memory
+     //
+-    #pragma unroll
++    #pragma unroll 1
+     for (int target_merged_threads_number = 2;
+          target_merged_threads_number <= NUM_THREADS;
+          target_merged_threads_number *= 2)
+diff --git a/dependencies/cub/cub/thread/thread_sort.cuh b/dependencies/cub/cub/thread/thread_sort.cuh
+index 5d486789..b42fb5f0 100644
+--- a/dependencies/cub/cub/thread/thread_sort.cuh
++++ b/dependencies/cub/cub/thread/thread_sort.cuh
+@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
+ {
+   constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
+
+-  #pragma unroll
++  #pragma unroll 1
+   for (int i = 0; i < ITEMS_PER_THREAD; ++i)
+   {
+-  #pragma unroll
++  #pragma unroll 1
+     for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
+     {
+       if (compare_op(keys[j + 1], keys[j]))
+       
diff --git a/cpp/cmake/thirdparty/patches/thrust_override.json b/cpp/cmake/thirdparty/patches/thrust_override.json
new file mode 100644
index 00000000000..f1908a64719
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_override.json
@@ -0,0 +1,34 @@
+
+{
+  "packages" : {
+    "Thrust" : {
+      "patches" : [
+        {
+          "file" : "Thrust/install_rules.diff",
+          "issue" : "Thrust 1.X installs incorrect files [https://github.com/NVIDIA/thrust/issues/1790]",
+          "fixed_in" : "2.0.0"
+        },
+        {
+          "file" : "${current_json_dir}/thrust_transform_iter_with_reduce_by_key.diff",
+          "issue" : "Support transform_output_iterator as output of reduce by key [https://github.com/NVIDIA/thrust/pull/1805]",
+          "fixed_in" : "2.1"
+        },
+        {
+          "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
+          "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
+          "fixed_in" : ""
+        },
+        {
+          "file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
+          "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
+          "fixed_in" : ""
+        },
+        {
+          "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
+          "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}
diff --git a/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
new file mode 100644
index 00000000000..035da3ef385
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
@@ -0,0 +1,26 @@
+diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h
+index f512a36..a5f725d 100644
+--- a/thrust/iterator/transform_input_output_iterator.h
++++ b/thrust/iterator/transform_input_output_iterator.h
+@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
+   /*! \endcond
+    */
+ 
++  transform_input_output_iterator() = default;
++
+   /*! This constructor takes as argument a \c Iterator an \c InputFunction and an
+    * \c OutputFunction and copies them to a new \p transform_input_output_iterator
+    *
+diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h
+index 66fb46a..4a68cb5 100644
+--- a/thrust/iterator/transform_output_iterator.h
++++ b/thrust/iterator/transform_output_iterator.h
+@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
+   /*! \endcond
+    */
+ 
++  transform_output_iterator() = default;
++
+   /*! This constructor takes as argument an \c OutputIterator and an \c
+    * UnaryFunction and copies them to a new \p transform_output_iterator
+    *
diff --git a/cpp/cmake/thrust.patch b/cpp/cmake/thrust.patch
deleted file mode 100644
index 0dd9854d4aa..00000000000
--- a/cpp/cmake/thrust.patch
+++ /dev/null
@@ -1,142 +0,0 @@
-diff --git a/cub/block/block_merge_sort.cuh b/cub/block/block_merge_sort.cuh
-index 4769df36..d86d6342 100644
---- a/cub/block/block_merge_sort.cuh
-+++ b/cub/block/block_merge_sort.cuh
-@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
-   KeyT key1 = keys_shared[keys1_beg];
-   KeyT key2 = keys_shared[keys2_beg];
-
--#pragma unroll
-+#pragma unroll 1
-   for (int item = 0; item < ITEMS_PER_THREAD; ++item)
-   {
-     bool p = (keys2_beg < keys2_end) &&
-@@ -383,7 +383,7 @@ public:
-       //
-       KeyT max_key = oob_default;
-
--      #pragma unroll
-+      #pragma unroll 1
-       for (int item = 1; item < ITEMS_PER_THREAD; ++item)
-       {
-         if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
-@@ -407,7 +407,7 @@ public:
-     // each thread has sorted keys
-     // merge sort keys in shared memory
-     //
--    #pragma unroll
-+    #pragma unroll 1
-     for (int target_merged_threads_number = 2;
-          target_merged_threads_number <= NUM_THREADS;
-          target_merged_threads_number *= 2)
-diff --git a/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/device/dispatch/dispatch_radix_sort.cuh
-index b188c75f..3f36656f 100644
---- a/cub/device/dispatch/dispatch_radix_sort.cuh
-+++ b/cub/device/dispatch/dispatch_radix_sort.cuh
-@@ -736,7 +736,7 @@ struct DeviceRadixSortPolicy
-
-
-     /// SM60 (GP100)
--    struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
-+    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
-     {
-         enum {
-             PRIMARY_RADIX_BITS      = (sizeof(KeyT) > 1) ? 7 : 5,    // 6.9B 32b keys/s (Quadro P100)
-diff --git a/cub/device/dispatch/dispatch_reduce.cuh b/cub/device/dispatch/dispatch_reduce.cuh
-index e0470ccb..6a0c2ed6 100644
---- a/cub/device/dispatch/dispatch_reduce.cuh
-+++ b/cub/device/dispatch/dispatch_reduce.cuh
-@@ -280,7 +280,7 @@ struct DeviceReducePolicy
-     };
-
-     /// SM60
--    struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
-+    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
-     {
-         // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items)
-         typedef AgentReducePolicy<
-diff --git a/cub/device/dispatch/dispatch_scan.cuh b/cub/device/dispatch/dispatch_scan.cuh
-index c2d04588..ac2d10e0 100644
---- a/cub/device/dispatch/dispatch_scan.cuh
-+++ b/cub/device/dispatch/dispatch_scan.cuh
-@@ -177,7 +177,7 @@ struct DeviceScanPolicy
-     };
-
-     /// SM600
--    struct Policy600 : ChainedPolicy<600, Policy600, Policy520>
-+    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
-     {
-         typedef AgentScanPolicy<
-                 128, 15,                                        ///< Threads per block, items per thread
-diff --git a/cub/thread/thread_sort.cuh b/cub/thread/thread_sort.cuh
-index 5d486789..b42fb5f0 100644
---- a/cub/thread/thread_sort.cuh
-+++ b/cub/thread/thread_sort.cuh
-@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
- {
-   constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
-
--  #pragma unroll
-+  #pragma unroll 1
-   for (int i = 0; i < ITEMS_PER_THREAD; ++i)
-   {
--  #pragma unroll
-+  #pragma unroll 1
-     for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
-     {
-       if (compare_op(keys[j + 1], keys[j]))
-diff --git a/thrust/system/cuda/detail/dispatch.h b/thrust/system/cuda/detail/dispatch.h
-index d0e3f94..76774b0 100644
---- a/thrust/system/cuda/detail/dispatch.h
-+++ b/thrust/system/cuda/detail/dispatch.h
-@@ -32,9 +32,8 @@
-         status = call arguments; \
-     } \
-     else { \
--        auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
--        status = call arguments; \
--    }
-+       throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
-+    }
-
- /**
-  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
-@@ -52,10 +51,8 @@
-         status = call arguments; \
-     } \
-     else { \
--        auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
--        auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
--        status = call arguments; \
--    }
-+       throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
-+    }
- /**
-  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
-  * implementation. This version allows using different token sequences for callables
-diff --git a/thrust/iterator/transform_input_output_iterator.h b/thrust/iterator/transform_input_output_iterator.h
-index f512a36..a5f725d 100644
---- a/thrust/iterator/transform_input_output_iterator.h
-+++ b/thrust/iterator/transform_input_output_iterator.h
-@@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
-   /*! \endcond
-    */
- 
-+  transform_input_output_iterator() = default;
-+
-   /*! This constructor takes as argument a \c Iterator an \c InputFunction and an
-    * \c OutputFunction and copies them to a new \p transform_input_output_iterator
-    *
-diff --git a/thrust/iterator/transform_output_iterator.h b/thrust/iterator/transform_output_iterator.h
-index 66fb46a..4a68cb5 100644
---- a/thrust/iterator/transform_output_iterator.h
-+++ b/thrust/iterator/transform_output_iterator.h
-@@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
-   /*! \endcond
-    */
- 
-+  transform_output_iterator() = default;
-+
-   /*! This constructor takes as argument an \c OutputIterator and an \c
-    * UnaryFunction and copies them to a new \p transform_output_iterator
-    *

From 4c0f2fd7e68b618075c63d379c16499821faed30 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:24:17 -0400
Subject: [PATCH 063/202] Fix lists and structs gtests coded in namespace
 cudf::test (#11956)

Fixes structs and lists gtests source files coded in namespace `cudf::test`
These are the only 2 problem files for this in `cpp/tests/structs` and `cpp/tests/lists` and so will make those two directories complete.
No function or test has changed just the source code reworked per namespaces.

Reference #11734

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11956
---
 cpp/tests/lists/contains_tests.cpp    | 620 +++++++++++++-------------
 cpp/tests/structs/utilities_tests.cpp | 373 +++++++++-------
 2 files changed, 521 insertions(+), 472 deletions(-)

diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp
index a93ef4f8b1d..1658843f1a4 100644
--- a/cpp/tests/lists/contains_tests.cpp
+++ b/cpp/tests/lists/contains_tests.cpp
@@ -26,40 +26,37 @@
 #include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-namespace cudf {
-namespace test {
-
 namespace {
 template <typename T, std::enable_if_t<cudf::is_numeric<T>(), void>* = nullptr>
 auto create_scalar_search_key(T const& value)
 {
-  auto search_key = make_numeric_scalar(data_type{type_to_id<T>()});
+  auto search_key = cudf::make_numeric_scalar(cudf::data_type{cudf::type_to_id<T>()});
   search_key->set_valid_async(true);
-  static_cast<scalar_type_t<T>*>(search_key.get())->set_value(value);
+  static_cast<cudf::scalar_type_t<T>*>(search_key.get())->set_value(value);
   return search_key;
 }
 
 template <typename T, std::enable_if_t<std::is_same_v<T, std::string>, void>* = nullptr>
 auto create_scalar_search_key(std::string const& value)
 {
-  return make_string_scalar(value);
+  return cudf::make_string_scalar(value);
 }
 
 template <typename T, std::enable_if_t<cudf::is_timestamp<T>(), void>* = nullptr>
 auto create_scalar_search_key(typename T::rep const& value)
 {
-  auto search_key = make_timestamp_scalar(data_type{type_to_id<T>()});
+  auto search_key = cudf::make_timestamp_scalar(cudf::data_type{cudf::type_to_id<T>()});
   search_key->set_valid_async(true);
-  static_cast<scalar_type_t<typename T::rep>*>(search_key.get())->set_value(value);
+  static_cast<cudf::scalar_type_t<typename T::rep>*>(search_key.get())->set_value(value);
   return search_key;
 }
 
 template <typename T, std::enable_if_t<cudf::is_duration<T>(), void>* = nullptr>
 auto create_scalar_search_key(typename T::rep const& value)
 {
-  auto search_key = make_duration_scalar(data_type{type_to_id<T>()});
+  auto search_key = cudf::make_duration_scalar(cudf::data_type{cudf::type_to_id<T>()});
   search_key->set_valid_async(true);
-  static_cast<scalar_type_t<typename T::rep>*>(search_key.get())->set_value(value);
+  static_cast<cudf::scalar_type_t<typename T::rep>*>(search_key.get())->set_value(value);
   return search_key;
 }
 
@@ -72,7 +69,7 @@ auto make_struct_scalar(Args&&... args)
 template <typename T, std::enable_if_t<cudf::is_numeric<T>(), void>* = nullptr>
 auto create_null_search_key()
 {
-  auto search_key = make_numeric_scalar(data_type{type_to_id<T>()});
+  auto search_key = cudf::make_numeric_scalar(cudf::data_type{cudf::type_to_id<T>()});
   search_key->set_valid_async(false);
   return search_key;
 }
@@ -80,7 +77,7 @@ auto create_null_search_key()
 template <typename T, std::enable_if_t<cudf::is_timestamp<T>(), void>* = nullptr>
 auto create_null_search_key()
 {
-  auto search_key = make_timestamp_scalar(data_type{type_to_id<T>()});
+  auto search_key = cudf::make_timestamp_scalar(cudf::data_type{cudf::type_to_id<T>()});
   search_key->set_valid_async(false);
   return search_key;
 }
@@ -88,30 +85,29 @@ auto create_null_search_key()
 template <typename T, std::enable_if_t<cudf::is_duration<T>(), void>* = nullptr>
 auto create_null_search_key()
 {
-  auto search_key = make_duration_scalar(data_type{type_to_id<T>()});
+  auto search_key = cudf::make_duration_scalar(cudf::data_type{cudf::type_to_id<T>()});
   search_key->set_valid_async(false);
   return search_key;
 }
 
 }  // namespace
 
-auto constexpr X          = int32_t{0};     // Placeholder for nulls.
-auto constexpr ABSENT     = size_type{-1};  // Index when key is not found in a list.
-auto constexpr FIND_FIRST = lists::duplicate_find_option::FIND_FIRST;
-auto constexpr FIND_LAST  = lists::duplicate_find_option::FIND_LAST;
+auto constexpr X          = int32_t{0};           // Placeholder for nulls.
+auto constexpr ABSENT     = cudf::size_type{-1};  // Index when key is not found in a list.
+auto constexpr FIND_FIRST = cudf::lists::duplicate_find_option::FIND_FIRST;
+auto constexpr FIND_LAST  = cudf::lists::duplicate_find_option::FIND_LAST;
 
 using bools_col   = cudf::test::fixed_width_column_wrapper<bool, int32_t>;
-using indices_col = cudf::test::fixed_width_column_wrapper<size_type>;
-using structs_col = cudf::test::structs_column_wrapper;
-using strings_col = cudf::test::strings_column_wrapper;
+using indices_col = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
-using iterators::all_nulls;
-using iterators::null_at;
-using iterators::nulls_at;
+using cudf::test::iterators::all_nulls;
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
 
-using ContainsTestTypes = Concat<IntegralTypesNotBool, FloatingPointTypes, ChronoTypes>;
+using ContainsTestTypes = cudf::test::
+  Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes, cudf::test::ChronoTypes>;
 
-struct ContainsTest : public BaseFixture {
+struct ContainsTest : public cudf::test::BaseFixture {
 };
 
 template <typename T>
@@ -124,40 +120,40 @@ TYPED_TEST(TypedContainsTest, ScalarKeyWithNoNulls)
 {
   using T = TypeParam;
 
-  auto const search_space_col = lists_column_wrapper<T, int32_t>{{0, 1, 2, 1},
-                                                                 {3, 4, 5},
-                                                                 {6, 7, 8},
-                                                                 {9, 0, 1, 3, 1},
-                                                                 {2, 3, 4},
-                                                                 {5, 6, 7},
-                                                                 {8, 9, 0},
-                                                                 {},
-                                                                 {1, 2, 1, 3},
-                                                                 {}};
-  auto const search_space     = lists_column_view{search_space_col};
+  auto const search_space_col = cudf::test::lists_column_wrapper<T, int32_t>{{0, 1, 2, 1},
+                                                                             {3, 4, 5},
+                                                                             {6, 7, 8},
+                                                                             {9, 0, 1, 3, 1},
+                                                                             {2, 3, 4},
+                                                                             {5, 6, 7},
+                                                                             {8, 9, 0},
+                                                                             {},
+                                                                             {1, 2, 1, 3},
+                                                                             {}};
+  auto const search_space     = cudf::lists_column_view{search_space_col};
   auto search_key_one         = create_scalar_search_key<T>(1);
 
   {
     // CONTAINS
-    auto result   = lists::contains(search_space, *search_key_one);
+    auto result   = cudf::lists::contains(search_space, *search_key_one);
     auto expected = bools_col{1, 0, 0, 1, 0, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto result   = lists::contains_nulls(search_space);
+    auto result   = cudf::lists::contains_nulls(search_space);
     auto expected = bools_col{0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space, *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space, *search_key_one, FIND_FIRST);
     auto expected = indices_col{1, ABSENT, ABSENT, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space, *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space, *search_key_one, FIND_LAST);
     auto expected = indices_col{3, ABSENT, ABSENT, 4, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -168,42 +164,42 @@ TYPED_TEST(TypedContainsTest, ScalarKeyWithNullLists)
   // Test List columns that have NULL list rows.
   using T = TypeParam;
 
-  auto const search_space_col = lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
-                                                                  {3, 4, 5},
-                                                                  {6, 7, 8},
-                                                                  {},
-                                                                  {9, 0, 1, 3, 1},
-                                                                  {2, 3, 4},
-                                                                  {5, 6, 7},
-                                                                  {8, 9, 0},
-                                                                  {},
-                                                                  {1, 2, 2, 3},
-                                                                  {}},
-                                                                 nulls_at({3, 10})};
-  auto const search_space     = lists_column_view{search_space_col};
+  auto const search_space_col = cudf::test::lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
+                                                                              {3, 4, 5},
+                                                                              {6, 7, 8},
+                                                                              {},
+                                                                              {9, 0, 1, 3, 1},
+                                                                              {2, 3, 4},
+                                                                              {5, 6, 7},
+                                                                              {8, 9, 0},
+                                                                              {},
+                                                                              {1, 2, 2, 3},
+                                                                              {}},
+                                                                             nulls_at({3, 10})};
+  auto const search_space     = cudf::lists_column_view{search_space_col};
   auto search_key_one         = create_scalar_search_key<T>(1);
   {
     // CONTAINS
-    auto result   = lists::contains(search_space, *search_key_one);
+    auto result   = cudf::lists::contains(search_space, *search_key_one);
     auto expected = bools_col{{1, 0, 0, X, 1, 0, 0, 0, 0, 1, X}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto result   = lists::contains_nulls(search_space);
+    auto result   = cudf::lists::contains_nulls(search_space);
     auto expected = bools_col{{0, 0, 0, X, 0, 0, 0, 0, 0, 0, X}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space, *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space, *search_key_one, FIND_FIRST);
     auto expected = indices_col{{1, ABSENT, ABSENT, X, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, X},
                                 nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space, *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space, *search_key_one, FIND_LAST);
     auto expected = indices_col{{3, ABSENT, ABSENT, X, 4, ABSENT, ABSENT, ABSENT, ABSENT, 0, X},
                                 nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -216,18 +212,18 @@ TYPED_TEST(TypedContainsTest, SlicedLists)
   using namespace cudf;
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
-                                                        {3, 4, 5},
-                                                        {6, 7, 8},
-                                                        {},
-                                                        {9, 0, 1, 3, 1},
-                                                        {2, 3, 4},
-                                                        {5, 6, 7},
-                                                        {8, 9, 0},
-                                                        {},
-                                                        {1, 2, 1, 3},
-                                                        {}},
-                                                       nulls_at({3, 10})};
+  auto search_space = cudf::test::lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
+                                                                    {3, 4, 5},
+                                                                    {6, 7, 8},
+                                                                    {},
+                                                                    {9, 0, 1, 3, 1},
+                                                                    {2, 3, 4},
+                                                                    {5, 6, 7},
+                                                                    {8, 9, 0},
+                                                                    {},
+                                                                    {1, 2, 1, 3},
+                                                                    {}},
+                                                                   nulls_at({3, 10})};
 
   {
     // First Slice.
@@ -235,26 +231,26 @@ TYPED_TEST(TypedContainsTest, SlicedLists)
     auto search_key_one  = create_scalar_search_key<T>(1);
     {
       // CONTAINS
-      auto result          = lists::contains(sliced_column_1, *search_key_one);
+      auto result          = cudf::lists::contains(sliced_column_1, *search_key_one);
       auto expected_result = bools_col{{0, 0, X, 1, 0, 0, 0}, null_at(2)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
     {
       // CONTAINS NULLS
-      auto result          = lists::contains_nulls(sliced_column_1);
+      auto result          = cudf::lists::contains_nulls(sliced_column_1);
       auto expected_result = bools_col{{0, 0, X, 0, 0, 0, 0}, null_at(2)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
     {
       // FIND_FIRST
-      auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_FIRST);
+      auto result = cudf::lists::index_of(sliced_column_1, *search_key_one, FIND_FIRST);
       auto expected_result =
         indices_col{{ABSENT, ABSENT, 0, 2, ABSENT, ABSENT, ABSENT}, null_at(2)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
     {
       // FIND_LAST
-      auto result = lists::index_of(sliced_column_1, *search_key_one, FIND_LAST);
+      auto result = cudf::lists::index_of(sliced_column_1, *search_key_one, FIND_LAST);
       auto expected_result =
         indices_col{{ABSENT, ABSENT, 0, 4, ABSENT, ABSENT, ABSENT}, null_at(2)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
@@ -267,25 +263,25 @@ TYPED_TEST(TypedContainsTest, SlicedLists)
     auto search_key_one  = create_scalar_search_key<T>(1);
     {
       // CONTAINS
-      auto result          = lists::contains(sliced_column_2, *search_key_one);
+      auto result          = cudf::lists::contains(sliced_column_2, *search_key_one);
       auto expected_result = bools_col{{X, 1, 0, 0, 0, 0, 1}, null_at(0)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
     {
       // CONTAINS NULLS
-      auto result          = lists::contains_nulls(sliced_column_2);
+      auto result          = cudf::lists::contains_nulls(sliced_column_2);
       auto expected_result = bools_col{{X, 0, 0, 0, 0, 0, 0}, null_at(0)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
     {
       // FIND_FIRST
-      auto result          = lists::index_of(sliced_column_2, *search_key_one, FIND_FIRST);
+      auto result          = cudf::lists::index_of(sliced_column_2, *search_key_one, FIND_FIRST);
       auto expected_result = indices_col{{0, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0}, null_at(0)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
     {
       // FIND_LAST
-      auto result          = lists::index_of(sliced_column_2, *search_key_one, FIND_LAST);
+      auto result          = cudf::lists::index_of(sliced_column_2, *search_key_one, FIND_LAST);
       auto expected_result = indices_col{{0, 4, ABSENT, ABSENT, ABSENT, ABSENT, 2}, null_at(0)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected_result, result->view());
     }
@@ -297,33 +293,33 @@ TYPED_TEST(TypedContainsTest, ScalarKeyNonNullListsWithNullValues)
   // Test List columns that have no NULL list rows, but NULL elements in some list rows.
   using T = TypeParam;
 
-  auto numerals     = fixed_width_column_wrapper<T>{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
-  auto search_space = make_lists_column(
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
+  auto search_space = cudf::make_lists_column(
     8, indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {});
   // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ]
   auto search_key_one = create_scalar_search_key<T>(1);
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_one);
     auto expected = bools_col{0, 1, 0, 0, 0, 0, 0, 1};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto result   = lists::contains_nulls(search_space->view());
+    auto result   = cudf::lists::contains_nulls(search_space->view());
     auto expected = bools_col{1, 0, 1, 0, 0, 1, 1, 1};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
     auto expected = indices_col{ABSENT, 0, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
     auto expected = indices_col{ABSENT, 0, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 3};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -333,11 +329,11 @@ TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists)
 {
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
   auto input_null_mask_iter = null_at(4);
 
-  auto search_space = make_lists_column(
+  auto search_space = cudf::make_lists_column(
     8,
     indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
@@ -348,25 +344,25 @@ TYPED_TEST(TypedContainsTest, ScalarKeysWithNullsInLists)
   auto search_key_one = create_scalar_search_key<T>(1);
   {
     // CONTAINS.
-    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_one);
     auto expected = bools_col{{0, 1, 0, 0, X, 0, 0, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS.
-    auto result   = lists::contains_nulls(search_space->view());
+    auto result   = cudf::lists::contains_nulls(search_space->view());
     auto expected = bools_col{{1, 0, 1, 0, X, 1, 1, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST.
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 0}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST.
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
     auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 3}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -376,12 +372,12 @@ TEST_F(ContainsTest, BoolScalarWithNullsInLists)
 {
   using T = bool;
 
-  auto numerals = fixed_width_column_wrapper<T>{{X, 1, 1, X, 1, 1, X, 1, 1, X, X, 1, 1, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 1, 1, X, 1, 1, X, 1, 1, X, X, 1, 1, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
   auto input_null_mask_iter = null_at(4);
-  auto search_space         = make_lists_column(
+  auto search_space         = cudf::make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
@@ -390,25 +386,25 @@ TEST_F(ContainsTest, BoolScalarWithNullsInLists)
   auto search_key_one = create_scalar_search_key<T>(1);
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_one);
     auto expected = bools_col{{0, 1, 1, 0, X, 1, 0, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto result   = lists::contains_nulls(search_space->view());
+    auto result   = cudf::lists::contains_nulls(search_space->view());
     auto expected = bools_col{{1, 0, 1, 0, X, 1, 1, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST.
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 0, 1, ABSENT, X, 0, ABSENT, 0}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST.
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
     auto expected = indices_col{{ABSENT, 1, 2, ABSENT, X, 1, ABSENT, 3}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -418,11 +414,11 @@ TEST_F(ContainsTest, StringScalarWithNullsInLists)
 {
   using T = std::string;
 
-  auto strings = strings_column_wrapper{
+  auto strings = cudf::test::strings_column_wrapper{
     {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"},
     nulls_at({0, 3, 6, 9, 10, 13})};
   auto input_null_mask_iter = null_at(4);
-  auto search_space         = make_lists_column(
+  auto search_space         = cudf::make_lists_column(
     8,
     indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     strings.release(),
@@ -433,25 +429,25 @@ TEST_F(ContainsTest, StringScalarWithNullsInLists)
   auto search_key_one = create_scalar_search_key<T>("1");
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_one);
     auto expected = bools_col{{0, 1, 0, 0, X, 0, 0, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto result   = lists::contains_nulls(search_space->view());
+    auto result   = cudf::lists::contains_nulls(search_space->view());
     auto expected = bools_col{{1, 0, 1, 0, X, 1, 1, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST.
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 0}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST.
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
     auto expected = indices_col{{ABSENT, 0, ABSENT, ABSENT, X, ABSENT, ABSENT, 3}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -461,35 +457,35 @@ TYPED_TEST(TypedContainsTest, ScalarNullSearchKey)
 {
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{{{0, 1, 2},
-                                                        {3, 4, 5},
-                                                        {6, 7, 8},
-                                                        {},
-                                                        {9, 0, 1},
-                                                        {2, 3, 4},
-                                                        {5, 6, 7},
-                                                        {8, 9, 0},
-                                                        {},
-                                                        {1, 2, 3},
-                                                        {}},
-                                                       nulls_at({3, 10})}
+  auto search_space = cudf::test::lists_column_wrapper<T, int32_t>{{{0, 1, 2},
+                                                                    {3, 4, 5},
+                                                                    {6, 7, 8},
+                                                                    {},
+                                                                    {9, 0, 1},
+                                                                    {2, 3, 4},
+                                                                    {5, 6, 7},
+                                                                    {8, 9, 0},
+                                                                    {},
+                                                                    {1, 2, 3},
+                                                                    {}},
+                                                                   nulls_at({3, 10})}
                         .release();
   auto search_key_null = create_null_search_key<T>();
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), *search_key_null);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_null);
     auto expected = bools_col{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), *search_key_null, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_null, FIND_FIRST);
     auto expected = indices_col{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), *search_key_null, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_null, FIND_LAST);
     auto expected = indices_col{{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, all_nulls()};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -499,28 +495,29 @@ TEST_F(ContainsTest, ScalarTypeRelatedExceptions)
 {
   {
     // Nested types unsupported.
-    auto list_of_lists = lists_column_wrapper<int32_t>{
+    auto list_of_lists = cudf::test::lists_column_wrapper<int32_t>{
       {{1, 2, 3}, {4, 5, 6}},
       {{1, 2, 3}, {4, 5, 6}},
       {{1, 2, 3},
        {4, 5, 6}}}.release();
     auto skey = create_scalar_search_key<int32_t>(10);
-    EXPECT_THROW(lists::contains(list_of_lists->view(), *skey), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_lists->view(), *skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_lists->view(), *skey, FIND_LAST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::contains(list_of_lists->view(), *skey), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), *skey, FIND_FIRST),
+                 cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), *skey, FIND_LAST), cudf::logic_error);
   }
   {
     // Search key must match list elements in type.
     auto list_of_ints =
-      lists_column_wrapper<int32_t>{
+      cudf::test::lists_column_wrapper<int32_t>{
         {0, 1, 2},
         {3, 4, 5},
       }
         .release();
     auto skey = create_scalar_search_key<std::string>("Hello, World!");
-    EXPECT_THROW(lists::contains(list_of_ints->view(), *skey), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_ints->view(), *skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_ints->view(), *skey, FIND_LAST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), *skey), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), *skey, FIND_FIRST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), *skey, FIND_LAST), cudf::logic_error);
   }
 }
 
@@ -537,7 +534,7 @@ TYPED_TEST(TypedVectorContainsTest, VectorKeysWithNoNulls)
 {
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{
+  auto search_space = cudf::test::lists_column_wrapper<T, int32_t>{
     {0, 1, 2, 1},
     {3, 4, 5},
     {6, 7, 8},
@@ -549,22 +546,23 @@ TYPED_TEST(TypedVectorContainsTest, VectorKeysWithNoNulls)
     {1, 2, 3, 3},
     {}}.release();
 
-  auto search_key = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1};
+  auto search_key =
+    cudf::test::fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1};
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_key);
+    auto result   = cudf::lists::contains(search_space->view(), search_key);
     auto expected = bools_col{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_key, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_key, FIND_FIRST);
     auto expected = indices_col{1, ABSENT, ABSENT, 2, 0, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_key, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_key, FIND_LAST);
     auto expected = indices_col{3, ABSENT, ABSENT, 4, 0, ABSENT, ABSENT, ABSENT, 3, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -576,38 +574,39 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullLists)
 
   using T = TypeParam;
 
-  auto search_space = lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
-                                                        {3, 4, 5},
-                                                        {6, 7, 8},
-                                                        {},
-                                                        {9, 0, 1, 3, 1},
-                                                        {2, 3, 4},
-                                                        {5, 6, 7},
-                                                        {8, 9, 0},
-                                                        {},
-                                                        {1, 2, 3, 3},
-                                                        {}},
-                                                       nulls_at({3, 10})}
+  auto search_space = cudf::test::lists_column_wrapper<T, int32_t>{{{0, 1, 2, 1},
+                                                                    {3, 4, 5},
+                                                                    {6, 7, 8},
+                                                                    {},
+                                                                    {9, 0, 1, 3, 1},
+                                                                    {2, 3, 4},
+                                                                    {5, 6, 7},
+                                                                    {8, 9, 0},
+                                                                    {},
+                                                                    {1, 2, 3, 3},
+                                                                    {}},
+                                                                   nulls_at({3, 10})}
                         .release();
 
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2};
+  auto search_keys =
+    cudf::test::fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2};
 
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_keys);
+    auto result   = cudf::lists::contains(search_space->view(), search_keys);
     auto expected = bools_col{{1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_FIRST);
     auto expected = indices_col{{1, ABSENT, ABSENT, X, ABSENT, 1, ABSENT, ABSENT, ABSENT, 0, X},
                                 nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_LAST);
     auto expected = indices_col{{3, ABSENT, ABSENT, X, ABSENT, 1, ABSENT, ABSENT, ABSENT, 0, X},
                                 nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -619,28 +618,28 @@ TYPED_TEST(TypedVectorContainsTest, VectorNonNullListsWithNullValues)
   // Test List columns that have no NULL list rows, but NULL elements in some list rows.
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
 
-  auto search_space = make_lists_column(
+  auto search_space = cudf::make_lists_column(
     8, indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(), numerals.release(), 0, {});
   // Search space: [ [x], [1,2], [x,4,5,x], [], [], [7,8,x], [x], [1,2,x,1] ]
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 1};
+  auto search_keys = cudf::test::fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 1};
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_keys);
+    auto result   = cudf::lists::contains(search_space->view(), search_keys);
     auto expected = bools_col{0, 1, 0, 0, 0, 0, 0, 1};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_FIRST);
     auto expected = indices_col{ABSENT, 1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_LAST);
     auto expected = indices_col{ABSENT, 1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 3};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -650,12 +649,12 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists)
 {
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
 
   auto input_null_mask_iter = null_at(4);
 
-  auto search_space = make_lists_column(
+  auto search_space = cudf::make_lists_column(
     8,
     indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
@@ -663,22 +662,22 @@ TYPED_TEST(TypedVectorContainsTest, VectorWithNullsInLists)
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
   // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
 
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 1};
+  auto search_keys = cudf::test::fixed_width_column_wrapper<T, int32_t>{1, 2, 3, 1, 2, 3, 1, 1};
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_keys);
+    auto result   = cudf::lists::contains(search_space->view(), search_keys);
     auto expected = bools_col{{0, 1, 0, 0, X, 0, 0, 1}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, ABSENT, X, ABSENT, ABSENT, 0}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_LAST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, ABSENT, X, ABSENT, ABSENT, 3}, null_at(4)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -688,12 +687,12 @@ TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInListsAndInSearc
 {
   using T = TypeParam;
 
-  auto numerals = fixed_width_column_wrapper<T>{{X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 1, 2, X, 4, 5, X, 7, 8, X, X, 1, 2, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
 
   auto input_null_mask_iter = null_at(4);
 
-  auto search_space = make_lists_column(
+  auto search_space = cudf::make_lists_column(
     8,
     indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
@@ -701,22 +700,23 @@ TYPED_TEST(TypedVectorContainsTest, ListContainsVectorWithNullsInListsAndInSearc
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
   // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
 
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{{1, 2, 3, X, 2, 3, 1, 1}, null_at(3)};
+  auto search_keys =
+    cudf::test::fixed_width_column_wrapper<T, int32_t>{{1, 2, 3, X, 2, 3, 1, 1}, null_at(3)};
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_keys);
+    auto result   = cudf::lists::contains(search_space->view(), search_keys);
     auto expected = bools_col{{0, 1, 0, X, X, 0, 0, 1}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 0}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_LAST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 3}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -726,36 +726,37 @@ TEST_F(ContainsTest, BoolKeyVectorWithNullsInListsAndInSearchKeys)
 {
   using T = bool;
 
-  auto numerals = fixed_width_column_wrapper<T>{{X, 0, 1, X, 1, 1, X, 1, 1, X, X, 0, 1, X, 1},
-                                                nulls_at({0, 3, 6, 9, 10, 13})};
+  auto numerals = cudf::test::fixed_width_column_wrapper<T>{
+    {X, 0, 1, X, 1, 1, X, 1, 1, X, X, 0, 1, X, 1}, nulls_at({0, 3, 6, 9, 10, 13})};
 
   auto input_null_mask_iter = null_at(4);
 
-  auto search_space = make_lists_column(
+  auto search_space = cudf::make_lists_column(
     8,
     indices_col{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     numerals.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
-  auto search_keys = fixed_width_column_wrapper<T, int32_t>{{0, 1, 0, X, 0, 0, 1, 1}, null_at(3)};
+  auto search_keys =
+    cudf::test::fixed_width_column_wrapper<T, int32_t>{{0, 1, 0, X, 0, 0, 1, 1}, null_at(3)};
   // Search space: [ [x], [0,1], [x,1,1,x], [], x, [1,1,x], [x], [0,1,x,1] ]
   // Search keys : [  0,   1,     0,         x, 0,  0,       1,   1        ]
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_keys);
+    auto result   = cudf::lists::contains(search_space->view(), search_keys);
     auto expected = bools_col{{0, 1, 0, X, X, 0, 0, 1}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 1}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_LAST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 3}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -763,37 +764,38 @@ TEST_F(ContainsTest, BoolKeyVectorWithNullsInListsAndInSearchKeys)
 
 TEST_F(ContainsTest, StringKeyVectorWithNullsInListsAndInSearchKeys)
 {
-  auto strings = strings_column_wrapper{
+  auto strings = cudf::test::strings_column_wrapper{
     {"X", "1", "2", "X", "4", "5", "X", "7", "8", "X", "X", "1", "2", "X", "1"},
     nulls_at({0, 3, 6, 9, 10, 13})};
   auto input_null_mask_iter = null_at(4);
-  auto search_space         = make_lists_column(
+  auto search_space         = cudf::make_lists_column(
     8,
-    fixed_width_column_wrapper<size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 1, 3, 7, 7, 7, 10, 11, 15}.release(),
     strings.release(),
     1,
     cudf::test::detail::make_null_mask(input_null_mask_iter, input_null_mask_iter + 8));
 
-  auto search_keys = strings_column_wrapper{{"1", "2", "3", "X", "2", "3", "1", "1"}, null_at(3)};
+  auto search_keys =
+    cudf::test::strings_column_wrapper{{"1", "2", "3", "X", "2", "3", "1", "1"}, null_at(3)};
 
   // Search space: [ [x], [1,2], [x,4,5,x], [], x, [7,8,x], [x], [1,2,x,1] ]
   // Search keys:  [  1,   2,     3,         X, 2,  3,       1,   1]
 
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_keys);
+    auto result   = cudf::lists::contains(search_space->view(), search_keys);
     auto expected = bools_col{{0, 1, 0, X, X, 0, 0, 1}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_FIRST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 0}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_keys, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_keys, FIND_LAST);
     auto expected = indices_col{{ABSENT, 1, ABSENT, X, X, ABSENT, ABSENT, 3}, nulls_at({3, 4})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -803,36 +805,37 @@ TEST_F(ContainsTest, VectorTypeRelatedExceptions)
 {
   {
     // Nested types unsupported.
-    auto list_of_lists = lists_column_wrapper<int32_t>{
+    auto list_of_lists = cudf::test::lists_column_wrapper<int32_t>{
       {{1, 2, 3}, {4, 5, 6}},
       {{1, 2, 3}, {4, 5, 6}},
       {{1, 2, 3},
        {4, 5, 6}}}.release();
-    auto skey = fixed_width_column_wrapper<int32_t>{0, 1, 2};
-    EXPECT_THROW(lists::contains(list_of_lists->view(), skey), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_lists->view(), skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_lists->view(), skey, FIND_LAST), cudf::logic_error);
+    auto skey = cudf::test::fixed_width_column_wrapper<int32_t>{0, 1, 2};
+    EXPECT_THROW(cudf::lists::contains(list_of_lists->view(), skey), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), skey, FIND_FIRST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_lists->view(), skey, FIND_LAST), cudf::logic_error);
   }
   {
     // Search key must match list elements in type.
     auto list_of_ints =
-      lists_column_wrapper<int32_t>{
+      cudf::test::lists_column_wrapper<int32_t>{
         {0, 1, 2},
         {3, 4, 5},
       }
         .release();
-    auto skey = strings_column_wrapper{"Hello", "World"};
-    EXPECT_THROW(lists::contains(list_of_ints->view(), skey), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error);
+    auto skey = cudf::test::strings_column_wrapper{"Hello", "World"};
+    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), skey), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error);
   }
   {
     // Search key column size must match lists column size.
-    auto list_of_ints = lists_column_wrapper<int32_t>{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}.release();
-    auto skey         = fixed_width_column_wrapper<int32_t>{0, 1, 2, 3};
-    EXPECT_THROW(lists::contains(list_of_ints->view(), skey), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error);
-    EXPECT_THROW(lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error);
+    auto list_of_ints =
+      cudf::test::lists_column_wrapper<int32_t>{{0, 1, 2}, {3, 4, 5}, {6, 7, 8}}.release();
+    auto skey = cudf::test::fixed_width_column_wrapper<int32_t>{0, 1, 2, 3};
+    EXPECT_THROW(cudf::lists::contains(list_of_ints->view(), skey), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_FIRST), cudf::logic_error);
+    EXPECT_THROW(cudf::lists::index_of(list_of_ints->view(), skey, FIND_LAST), cudf::logic_error);
   }
 }
 
@@ -840,7 +843,7 @@ template <typename T>
 struct TypedContainsNaNsTest : public ContainsTest {
 };
 
-TYPED_TEST_SUITE(TypedContainsNaNsTest, FloatingPointTypes);
+TYPED_TEST_SUITE(TypedContainsNaNsTest, cudf::test::FloatingPointTypes);
 
 namespace {
 template <typename T>
@@ -864,7 +867,7 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsScalar)
   auto nan_2 = get_nan<T>("2");
   auto nan_3 = get_nan<T>("3");
 
-  auto search_space = lists_column_wrapper<T>{
+  auto search_space = cudf::test::lists_column_wrapper<T>{
     {0.0, 1.0, 2.0},
     {3, 4, 5},
     {6, 7, 8},
@@ -879,20 +882,20 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsScalar)
   auto search_key_nan = create_scalar_search_key<T>(nan_3);
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), *search_key_nan);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_nan);
     auto expected = bools_col{0, 0, 0, 0, 1, 0, 1, 0, 0, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_FIRST);
+    auto result = cudf::lists::index_of(search_space->view(), *search_key_nan, FIND_FIRST);
     auto expected =
       indices_col{ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT, 1, ABSENT, ABSENT, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result = lists::index_of(search_space->view(), *search_key_nan, FIND_LAST);
+    auto result = cudf::lists::index_of(search_space->view(), *search_key_nan, FIND_LAST);
     auto expected =
       indices_col{ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT, 1, ABSENT, ABSENT, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -916,7 +919,7 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector)
   auto nan_2 = get_nan<T>("2");
   auto nan_3 = get_nan<T>("3");
 
-  auto search_space = lists_column_wrapper<T>{
+  auto search_space = cudf::test::lists_column_wrapper<T>{
     {0.0, 1.0, 2.0},
     {{3, 4, 5}, null_at(2)},  // i.e. {3, 4, ∅}.
     {6, 7, 8},
@@ -933,25 +936,26 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector)
   {
     // With nulls in the search key rows. (At index 2.)
     auto search_keys =
-      fixed_width_column_wrapper<T>{search_key_values.begin(), search_key_values.end(), null_at(2)}
+      cudf::test::fixed_width_column_wrapper<T>{
+        search_key_values.begin(), search_key_values.end(), null_at(2)}
         .release();
 
     {
       // CONTAINS
-      auto result   = lists::contains(search_space->view(), search_keys->view());
+      auto result   = cudf::lists::contains(search_space->view(), search_keys->view());
       auto expected = bools_col{{1, 0, 0, 0, 1, 0, 1, 0, 1, 0}, null_at(2)};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_FIRST
-      auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST);
+      auto result = cudf::lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST);
       auto expected =
         indices_col{{1, ABSENT, X, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT}, nulls_at({2})};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_LAST
-      auto result = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST);
+      auto result = cudf::lists::index_of(search_space->view(), search_keys->view(), FIND_LAST);
       auto expected =
         indices_col{{1, ABSENT, X, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT}, nulls_at({2})};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -960,22 +964,23 @@ TYPED_TEST(TypedContainsNaNsTest, ListWithNaNsContainsVector)
   {
     // No nulls in the search key rows.
     auto search_keys =
-      fixed_width_column_wrapper<T>(search_key_values.begin(), search_key_values.end()).release();
+      cudf::test::fixed_width_column_wrapper<T>(search_key_values.begin(), search_key_values.end())
+        .release();
     {
       // CONTAINS
-      auto result   = lists::contains(search_space->view(), search_keys->view());
+      auto result   = cudf::lists::contains(search_space->view(), search_keys->view());
       auto expected = bools_col{1, 0, 0, 0, 1, 0, 1, 0, 1, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_FIRST
-      auto result   = lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST);
+      auto result   = cudf::lists::index_of(search_space->view(), search_keys->view(), FIND_FIRST);
       auto expected = indices_col{1, ABSENT, ABSENT, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_LAST
-      auto result   = lists::index_of(search_space->view(), search_keys->view(), FIND_LAST);
+      auto result   = cudf::lists::index_of(search_space->view(), search_keys->view(), FIND_LAST);
       auto expected = indices_col{1, ABSENT, ABSENT, ABSENT, 0, ABSENT, 2, ABSENT, 1, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
@@ -986,7 +991,7 @@ template <typename T>
 struct TypedContainsDecimalsTest : public ContainsTest {
 };
 
-TYPED_TEST_SUITE(TypedContainsDecimalsTest, FixedPointTypes);
+TYPED_TEST_SUITE(TypedContainsDecimalsTest, cudf::test::FixedPointTypes);
 
 TYPED_TEST(TypedContainsDecimalsTest, ScalarKey)
 {
@@ -995,29 +1000,30 @@ TYPED_TEST(TypedContainsDecimalsTest, ScalarKey)
   auto const search_space = [] {
     auto const values = std::vector<typename T::rep>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
                                                      2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
-    auto decimals     = fixed_point_column_wrapper<typename T::rep>{
+    auto decimals     = cudf::test::fixed_point_column_wrapper<typename T::rep>{
       values.begin(), values.end(), numeric::scale_type{0}};
     auto list_offsets = indices_col{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24};
-    return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
+    return cudf::make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
   }();
-  auto search_key_one = make_fixed_point_scalar<T>(typename T::rep{1}, numeric::scale_type{0});
+  auto search_key_one =
+    cudf::make_fixed_point_scalar<T>(typename T::rep{1}, numeric::scale_type{0});
 
   // Search space: [[0,1,2], [3,4,5], [6,7,8], [9,0,1], [2,3,4], [5,6,7], [8,9,0], [], [1,2,3], []]
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), *search_key_one);
+    auto result   = cudf::lists::contains(search_space->view(), *search_key_one);
     auto expected = bools_col{1, 0, 0, 1, 0, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_FIRST);
     auto expected = indices_col{1, ABSENT, ABSENT, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), *search_key_one, FIND_LAST);
     auto expected = indices_col{1, ABSENT, ABSENT, 2, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -1030,13 +1036,13 @@ TYPED_TEST(TypedContainsDecimalsTest, VectorKey)
   auto const search_space = [] {
     auto const values = std::vector<typename T::rep>{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1,
                                                      2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3};
-    auto decimals     = fixed_point_column_wrapper<typename T::rep>{
+    auto decimals     = cudf::test::fixed_point_column_wrapper<typename T::rep>{
       values.begin(), values.end(), numeric::scale_type{0}};
     auto list_offsets = indices_col{0, 3, 6, 9, 12, 15, 18, 21, 21, 24, 24};
-    return make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
+    return cudf::make_lists_column(10, list_offsets.release(), decimals.release(), 0, {});
   }();
 
-  auto search_key = fixed_point_column_wrapper<typename T::rep>{
+  auto search_key = cudf::test::fixed_point_column_wrapper<typename T::rep>{
     {1, 2, 3, 1, 2, 3, 1, 2, 3, 1},
     numeric::scale_type{
       0}}.release();
@@ -1045,19 +1051,19 @@ TYPED_TEST(TypedContainsDecimalsTest, VectorKey)
   // ] Search keys:  [  1,       2,       3,       1,       2,       3,       1,       2,  3, 1 ]
   {
     // CONTAINS
-    auto result   = lists::contains(search_space->view(), search_key->view());
+    auto result   = cudf::lists::contains(search_space->view(), search_key->view());
     auto expected = bools_col{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto result   = lists::index_of(search_space->view(), search_key->view(), FIND_FIRST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_key->view(), FIND_FIRST);
     auto expected = indices_col{1, ABSENT, ABSENT, 2, 0, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto result   = lists::index_of(search_space->view(), search_key->view(), FIND_LAST);
+    auto result   = cudf::lists::index_of(search_space->view(), search_key->view(), FIND_LAST);
     auto expected = indices_col{1, ABSENT, ABSENT, 2, 0, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -1075,8 +1081,8 @@ TYPED_TEST(TypedStructContainsTest, EmptyInputTest)
   auto const lists = [] {
     auto offsets = indices_col{};
     auto data    = tdata_col{};
-    auto child   = structs_col{{data}};
-    return make_lists_column(0, offsets.release(), child.release(), 0, {});
+    auto child   = cudf::test::structs_column_wrapper{{data}};
+    return cudf::make_lists_column(0, offsets.release(), child.release(), 0, {});
   }();
 
   auto const scalar_key = [] {
@@ -1085,11 +1091,11 @@ TYPED_TEST(TypedStructContainsTest, EmptyInputTest)
   }();
   auto const column_key = [] {
     auto child = tdata_col{};
-    return structs_col{{child}};
+    return cudf::test::structs_column_wrapper{{child}};
   }();
 
-  auto const result1  = lists::contains(lists->view(), scalar_key);
-  auto const result2  = lists::contains(lists->view(), column_key);
+  auto const result1  = cudf::lists::contains(lists->view(), scalar_key);
+  auto const result2  = cudf::lists::contains(lists->view(), column_key);
   auto const expected = bools_col{};
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result1);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result2);
@@ -1121,8 +1127,8 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyNoNullLists)
                               1, 0, 1, 1
     };
     // clang-format on
-    auto child = structs_col{{data1, data2}};
-    return make_lists_column(10, offsets.release(), child.release(), 0, {});
+    auto child = cudf::test::structs_column_wrapper{{data1, data2}};
+    return cudf::make_lists_column(10, offsets.release(), child.release(), 0, {});
   }();
 
   auto const key = [] {
@@ -1133,26 +1139,26 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyNoNullLists)
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists->view(), key);
+    auto const result   = cudf::lists::contains(lists->view(), key);
     auto const expected = bools_col{1, 0, 0, 0, 0, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto const result   = lists::contains_nulls(lists->view());
+    auto const result   = cudf::lists::contains_nulls(lists->view());
     auto const expected = bools_col{0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result = lists::index_of(lists->view(), key, FIND_FIRST);
+    auto const result = cudf::lists::index_of(lists->view(), key, FIND_FIRST);
     auto const expected =
       indices_col{1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result = lists::index_of(lists->view(), key, FIND_LAST);
+    auto const result = cudf::lists::index_of(lists->view(), key, FIND_LAST);
     auto const expected =
       indices_col{1, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -1185,13 +1191,14 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyWithNullLists)
                               1, 0, 1, 1
     };
     // clang-format on
-    auto child               = structs_col{{data1, data2}};
+    auto child               = cudf::test::structs_column_wrapper{{data1, data2}};
     auto const validity_iter = nulls_at({3, 10});
-    return make_lists_column(11,
-                             offsets.release(),
-                             child.release(),
-                             2,
-                             detail::make_null_mask(validity_iter, validity_iter + 11));
+    return cudf::make_lists_column(
+      11,
+      offsets.release(),
+      child.release(),
+      2,
+      cudf::test::detail::make_null_mask(validity_iter, validity_iter + 11));
   }();
 
   auto const key = [] {
@@ -1202,26 +1209,26 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyWithNullLists)
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists->view(), key);
+    auto const result   = cudf::lists::contains(lists->view(), key);
     auto const expected = bools_col{{1, 0, 0, X, 0, 0, 0, 0, 0, 1, X}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto const result   = lists::contains_nulls(lists->view());
+    auto const result   = cudf::lists::contains_nulls(lists->view());
     auto const expected = bools_col{{0, 0, 0, X, 0, 0, 0, 0, 0, 0, X}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result   = lists::index_of(lists->view(), key, FIND_FIRST);
+    auto const result   = cudf::lists::index_of(lists->view(), key, FIND_FIRST);
     auto const expected = indices_col{
       {1, ABSENT, ABSENT, X, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, X}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result   = lists::index_of(lists->view(), key, FIND_LAST);
+    auto const result   = cudf::lists::index_of(lists->view(), key, FIND_LAST);
     auto const expected = indices_col{
       {1, ABSENT, ABSENT, X, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, X}, nulls_at({3, 10})};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -1254,8 +1261,8 @@ TYPED_TEST(TypedStructContainsTest, SlicedListsColumnNoNulls)
                               1, 0, 1, 1
     };
     // clang-format on
-    auto child = structs_col{{data1, data2}};
-    return make_lists_column(10, offsets.release(), child.release(), 0, {});
+    auto child = cudf::test::structs_column_wrapper{{data1, data2}};
+    return cudf::make_lists_column(10, offsets.release(), child.release(), 0, {});
   }();
   auto const lists = cudf::slice(lists_original->view(), {3, 10})[0];
 
@@ -1267,25 +1274,25 @@ TYPED_TEST(TypedStructContainsTest, SlicedListsColumnNoNulls)
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists, key);
+    auto const result   = cudf::lists::contains(lists, key);
     auto const expected = bools_col{0, 0, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto const result   = lists::contains_nulls(lists);
+    auto const result   = cudf::lists::contains_nulls(lists);
     auto const expected = bools_col{0, 0, 0, 0, 0, 0, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result   = lists::index_of(lists, key, FIND_FIRST);
+    auto const result   = cudf::lists::index_of(lists, key, FIND_FIRST);
     auto const expected = indices_col{ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result   = lists::index_of(lists, key, FIND_LAST);
+    auto const result   = cudf::lists::index_of(lists, key, FIND_LAST);
     auto const expected = indices_col{ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -1317,8 +1324,8 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyNoNullListsWithNullStructs)
                               X, 0, 1, 1
     };
     // clang-format on
-    auto child = structs_col{{data1, data2}, nulls_at({1, 10, 15, 24})};
-    return make_lists_column(10, offsets.release(), child.release(), 0, {});
+    auto child = cudf::test::structs_column_wrapper{{data1, data2}, nulls_at({1, 10, 15, 24})};
+    return cudf::make_lists_column(10, offsets.release(), child.release(), 0, {});
   }();
 
   auto const key = [] {
@@ -1329,26 +1336,26 @@ TYPED_TEST(TypedStructContainsTest, ScalarKeyNoNullListsWithNullStructs)
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists->view(), key);
+    auto const result   = cudf::lists::contains(lists->view(), key);
     auto const expected = bools_col{1, 0, 0, 0, 0, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // CONTAINS NULLS
-    auto const result   = lists::contains_nulls(lists->view());
+    auto const result   = cudf::lists::contains_nulls(lists->view());
     auto const expected = bools_col{1, 0, 0, 1, 1, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result = lists::index_of(lists->view(), key, FIND_FIRST);
+    auto const result = cudf::lists::index_of(lists->view(), key, FIND_FIRST);
     auto const expected =
       indices_col{3, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result = lists::index_of(lists->view(), key, FIND_LAST);
+    auto const result = cudf::lists::index_of(lists->view(), key, FIND_LAST);
     auto const expected =
       indices_col{3, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -1381,32 +1388,32 @@ TYPED_TEST(TypedStructContainsTest, ColumnKeyNoNullLists)
                               1, 0, 1, 1
     };
     // clang-format on
-    auto child = structs_col{{data1, data2}};
-    return make_lists_column(10, offsets.release(), child.release(), 0, {});
+    auto child = cudf::test::structs_column_wrapper{{data1, data2}};
+    return cudf::make_lists_column(10, offsets.release(), child.release(), 0, {});
   }();
 
   auto const keys = [] {
     auto child1 = tdata_col{1, 3, 1, 1, 2, 1, 0, 0, 1, 0};
     auto child2 = tdata_col{1, 0, 1, 1, 2, 1, 0, 0, 1, 0};
-    return structs_col{{child1, child2}};
+    return cudf::test::structs_column_wrapper{{child1, child2}};
   }();
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists->view(), keys);
+    auto const result   = cudf::lists::contains(lists->view(), keys);
     auto const expected = bools_col{1, 1, 0, 0, 0, 0, 0, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result = lists::index_of(lists->view(), keys, FIND_FIRST);
+    auto const result = cudf::lists::index_of(lists->view(), keys, FIND_FIRST);
     auto const expected =
       indices_col{1, 0, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 0, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result = lists::index_of(lists->view(), keys, FIND_LAST);
+    auto const result = cudf::lists::index_of(lists->view(), keys, FIND_LAST);
     auto const expected =
       indices_col{1, 2, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, ABSENT, 2, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
@@ -1439,14 +1446,14 @@ TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsNoNulls)
                               1, 0, 1, 1
     };
     // clang-format on
-    auto child = structs_col{{data1, data2}};
-    return make_lists_column(10, offsets.release(), child.release(), 0, {});
+    auto child = cudf::test::structs_column_wrapper{{data1, data2}};
+    return cudf::make_lists_column(10, offsets.release(), child.release(), 0, {});
   }();
 
   auto const keys_original = [] {
     auto child1 = tdata_col{1, 9, 1, 6, 2, 1, 0, 0, 1, 0};
     auto child2 = tdata_col{1, 1, 1, 1, 2, 1, 0, 0, 1, 0};
-    return structs_col{{child1, child2}};
+    return cudf::test::structs_column_wrapper{{child1, child2}};
   }();
 
   auto const lists = cudf::slice(lists_original->view(), {3, 7})[0];
@@ -1454,19 +1461,19 @@ TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsNoNulls)
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists, keys);
+    auto const result   = cudf::lists::contains(lists, keys);
     auto const expected = bools_col{1, 0, 1, 0};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result   = lists::index_of(lists, keys, FIND_FIRST);
+    auto const result   = cudf::lists::index_of(lists, keys, FIND_FIRST);
     auto const expected = indices_col{0, ABSENT, 1, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result   = lists::index_of(lists, keys, FIND_LAST);
+    auto const result   = cudf::lists::index_of(lists, keys, FIND_LAST);
     auto const expected = indices_col{0, ABSENT, 1, ABSENT};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -1498,19 +1505,20 @@ TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsHavingNulls)
                               X, 0, 1, 1
     };
     // clang-format on
-    auto child               = structs_col{{data1, data2}, nulls_at({1, 10, 15, 24})};
+    auto child = cudf::test::structs_column_wrapper{{data1, data2}, nulls_at({1, 10, 15, 24})};
     auto const validity_iter = nulls_at({3, 10});
-    return make_lists_column(11,
-                             offsets.release(),
-                             child.release(),
-                             2,
-                             detail::make_null_mask(validity_iter, validity_iter + 11));
+    return cudf::make_lists_column(
+      11,
+      offsets.release(),
+      child.release(),
+      2,
+      cudf::test::detail::make_null_mask(validity_iter, validity_iter + 11));
   }();
 
   auto const keys_original = [] {
     auto child1 = tdata_col{{1, X, 1, 6, X, 1, 0, 0, 1, 0, 1}, null_at(4)};
     auto child2 = tdata_col{{1, X, 1, 1, X, 1, 0, 0, 1, 0, 1}, null_at(4)};
-    return structs_col{{child1, child2}, null_at(1)};
+    return cudf::test::structs_column_wrapper{{child1, child2}, null_at(1)};
   }();
 
   auto const lists = cudf::slice(lists_original->view(), {4, 8})[0];
@@ -1518,19 +1526,19 @@ TYPED_TEST(TypedStructContainsTest, ColumnKeyWithSlicedListsHavingNulls)
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists, keys);
+    auto const result   = cudf::lists::contains(lists, keys);
     auto const expected = bools_col{{X, 0, 1, 0}, null_at(0)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result   = lists::index_of(lists, keys, FIND_FIRST);
+    auto const result   = cudf::lists::index_of(lists, keys, FIND_FIRST);
     auto const expected = indices_col{{X, ABSENT, 1, ABSENT}, null_at(0)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result   = lists::index_of(lists, keys, FIND_LAST);
+    auto const result   = cudf::lists::index_of(lists, keys, FIND_LAST);
     auto const expected = indices_col{{X, ABSENT, 2, ABSENT}, null_at(0)};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
@@ -1572,31 +1580,31 @@ TYPED_TEST(TypedListContainsTest, ScalarKeyLists)
 
   auto const key = [] {
     auto const child = tdata_col{0, 1, 2};
-    return list_scalar(child);
+    return cudf::list_scalar(child);
   }();
 
   auto const do_test = [&](auto const& lists, bool has_nulls) {
     {
       // CONTAINS
-      auto const result   = lists::contains(lists_column_view{lists}, key);
+      auto const result   = cudf::lists::contains(cudf::lists_column_view{lists}, key);
       auto const expected = bools_col{1, 0, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // CONTAINS NULLS
-      auto const result   = lists::contains_nulls(lists_column_view{lists});
+      auto const result   = cudf::lists::contains_nulls(cudf::lists_column_view{lists});
       auto const expected = has_nulls ? bools_col{1, 1, 0} : bools_col{0, 0, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_FIRST
-      auto const result   = lists::index_of(lists_column_view{lists}, key, FIND_FIRST);
+      auto const result   = cudf::lists::index_of(cudf::lists_column_view{lists}, key, FIND_FIRST);
       auto const expected = indices_col{0, ABSENT, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_LAST
-      auto const result   = lists::index_of(lists_column_view{lists}, key, FIND_LAST);
+      auto const result   = cudf::lists::index_of(cudf::lists_column_view{lists}, key, FIND_LAST);
       auto const expected = indices_col{2, ABSENT, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
@@ -1664,31 +1672,31 @@ TYPED_TEST(TypedListContainsTest, SlicedListsColumn)
 
   auto const key = [] {
     auto const child = tdata_col{0, 1, 2};
-    return list_scalar(child);
+    return cudf::list_scalar(child);
   }();
 
   auto const do_test = [&](auto const& lists, bool has_nulls) {
     {
       // CONTAINS
-      auto const result   = lists::contains(lists_column_view{lists}, key);
+      auto const result   = cudf::lists::contains(cudf::lists_column_view{lists}, key);
       auto const expected = bools_col{1, 0, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // CONTAINS NULLS
-      auto const result   = lists::contains_nulls(lists_column_view{lists});
+      auto const result   = cudf::lists::contains_nulls(cudf::lists_column_view{lists});
       auto const expected = has_nulls ? bools_col{1, 1, 0} : bools_col{0, 0, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_FIRST
-      auto const result   = lists::index_of(lists_column_view{lists}, key, FIND_FIRST);
+      auto const result   = cudf::lists::index_of(cudf::lists_column_view{lists}, key, FIND_FIRST);
       auto const expected = indices_col{0, ABSENT, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_LAST
-      auto const result   = lists::index_of(lists_column_view{lists}, key, FIND_LAST);
+      auto const result   = cudf::lists::index_of(cudf::lists_column_view{lists}, key, FIND_LAST);
       auto const expected = indices_col{2, ABSENT, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
@@ -1737,25 +1745,25 @@ TYPED_TEST(TypedListContainsTest, ColumnKeyLists)
   auto const do_test = [&](auto const& lists, bool has_nulls) {
     {
       // CONTAINS
-      auto const result   = lists::contains(lists_column_view{lists}, key);
+      auto const result   = cudf::lists::contains(cudf::lists_column_view{lists}, key);
       auto const expected = has_nulls ? bools_col{1, 1, 1} : bools_col{0, 1, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // CONTAINS NULLS
-      auto const result   = lists::contains_nulls(lists_column_view{lists});
+      auto const result   = cudf::lists::contains_nulls(cudf::lists_column_view{lists});
       auto const expected = has_nulls ? bools_col{1, 1, 0} : bools_col{0, 0, 0};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_FIRST
-      auto const result   = lists::index_of(lists_column_view{lists}, key, FIND_FIRST);
+      auto const result   = cudf::lists::index_of(cudf::lists_column_view{lists}, key, FIND_FIRST);
       auto const expected = has_nulls ? indices_col{0, 2, 1} : indices_col{ABSENT, 0, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
     {
       // FIND_LAST
-      auto const result   = lists::index_of(lists_column_view{lists}, key, FIND_LAST);
+      auto const result   = cudf::lists::index_of(cudf::lists_column_view{lists}, key, FIND_LAST);
       auto const expected = has_nulls ? indices_col{2, 2, 1} : indices_col{ABSENT, 2, ABSENT};
       CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
     }
@@ -1791,40 +1799,38 @@ TYPED_TEST(TypedListContainsTest, ColumnKeyWithListsOfStructsNoNulls)
 
     };
     // clang-format on
-    auto structs = structs_col{{data1, data2}};
-    auto child   = make_lists_column(8, child_offsets.release(), structs.release(), 0, {});
+    auto structs = cudf::test::structs_column_wrapper{{data1, data2}};
+    auto child   = cudf::make_lists_column(8, child_offsets.release(), structs.release(), 0, {});
 
     auto offsets = indices_col{0, 4, 8};
-    return make_lists_column(2, offsets.release(), std::move(child), 0, {});
+    return cudf::make_lists_column(2, offsets.release(), std::move(child), 0, {});
   }();
 
   auto const key = [] {
     auto data1       = tdata_col{0, 0, 2};
     auto data2       = tdata_col{10, 10, 12};
-    auto const child = structs_col{{data1, data2}};
-    return list_scalar(child);
+    auto const child = cudf::test::structs_column_wrapper{{data1, data2}};
+    return cudf::list_scalar(child);
   }();
 
   {
     // CONTAINS
-    auto const result   = lists::contains(lists_column_view{lists->view()}, key);
+    auto const result   = cudf::lists::contains(cudf::lists_column_view{lists->view()}, key);
     auto const expected = bools_col{1, 1};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_FIRST
-    auto const result   = lists::index_of(lists_column_view{lists->view()}, key, FIND_FIRST);
+    auto const result =
+      cudf::lists::index_of(cudf::lists_column_view{lists->view()}, key, FIND_FIRST);
     auto const expected = indices_col{0, 1};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
   {
     // FIND_LAST
-    auto const result   = lists::index_of(lists_column_view{lists->view()}, key, FIND_LAST);
+    auto const result =
+      cudf::lists::index_of(cudf::lists_column_view{lists->view()}, key, FIND_LAST);
     auto const expected = indices_col{2, 1};
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(expected, *result);
   }
 }
-
-}  // namespace test
-
-}  // namespace cudf
diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp
index d58568cd1b5..d56b2160ca3 100644
--- a/cpp/tests/structs/utilities_tests.cpp
+++ b/cpp/tests/structs/utilities_tests.cpp
@@ -28,90 +28,87 @@
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/null_mask.hpp>
 
-namespace cudf::test {
-
-using namespace cudf;
-using namespace iterators;
-using namespace cudf::structs::detail;
-using strings    = strings_column_wrapper;
-using dictionary = dictionary_column_wrapper<std::string>;
-using structs    = structs_column_wrapper;
-using bools      = fixed_width_column_wrapper<bool>;
-
 template <typename T>
-using nums = fixed_width_column_wrapper<T, int32_t>;
+using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
 template <typename T>
-using lists = lists_column_wrapper<T, int32_t>;
+using lists = cudf::test::lists_column_wrapper<T, int32_t>;
 
-struct StructUtilitiesTest : BaseFixture {
+struct StructUtilitiesTest : cudf::test::BaseFixture {
 };
 
 template <typename T>
 struct TypedStructUtilitiesTest : StructUtilitiesTest {
 };
 
-TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes);
+TYPED_TEST_SUITE(TypedStructUtilitiesTest, cudf::test::FixedWidthTypes);
 
 TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
 {
   using T     = TypeParam;
-  using lists = lists_column_wrapper<T, int32_t>;
-  using nums  = fixed_width_column_wrapper<T, int32_t>;
+  using lists = cudf::test::lists_column_wrapper<T, int32_t>;
+  using nums  = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
   auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}};
-  auto nums_col  = nums{{0, 1, 2}, null_at(6)};
+  auto nums_col  = nums{{0, 1, 2}, cudf::test::iterators::null_at(6)};
 
   auto table = cudf::table_view{{lists_col, nums_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(table,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
 {
   using T     = TypeParam;
-  using lists = lists_column_wrapper<T, int32_t>;
-  using nums  = fixed_width_column_wrapper<T, int32_t>;
+  using lists = cudf::test::lists_column_wrapper<T, int32_t>;
+  using nums  = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
   auto lists_member = lists{{0, 1}, {22, 33}, {44, 55, 66}};
-  auto nums_member  = nums{{0, 1, 2}, null_at(6)};
-  auto structs_col  = structs{{nums_member, lists_member}};
-  auto nums_col     = nums{{0, 1, 2}, null_at(6)};
-
-  EXPECT_THROW(flatten_nested_columns(
-                 cudf::table_view{{nums_col, structs_col}}, {}, {}, column_nullability::FORCE),
-               cudf::logic_error);
+  auto nums_member  = nums{{0, 1, 2}, cudf::test::iterators::null_at(6)};
+  auto structs_col  = cudf::test::structs_column_wrapper{{nums_member, lists_member}};
+  auto nums_col     = nums{{0, 1, 2}, cudf::test::iterators::null_at(6)};
+
+  EXPECT_THROW(
+    cudf::structs::detail::flatten_nested_columns(cudf::table_view{{nums_col, structs_col}},
+                                                  {},
+                                                  {},
+                                                  cudf::structs::detail::column_nullability::FORCE),
+    cudf::logic_error);
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_col        = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)};
-  auto strings_col     = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
+  auto nums_col    = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto strings_col = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
 
   auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(table,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_member    = nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)};
-  auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto structs_col    = structs{{nums_member, strings_member}};
-  auto nums_col       = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
-  auto table          = cudf::table_view{{nums_col, structs_col}};
+  auto nums_member    = nums{{0, 1, 22, 333, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto strings_member = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto structs_col = cudf::test::structs_column_wrapper{{nums_member, strings_member}};
+  auto nums_col    = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
+  auto table       = cudf::table_view{{nums_col, structs_col}};
 
   auto expected_nums_col_1  = cudf::column(nums_col);
-  auto expected_structs_col = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_structs_col = cudf::test::fixed_width_column_wrapper<bool>{{1, 1, 1, 1, 1, 1, 1}};
   auto expected_nums_col_2 =
     cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
   auto expected_strings_col =
@@ -120,22 +117,26 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
     {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_member    = nums{{0, 1, 22, 333, 44, 55, 66}, null_at(0)};
-  auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto structs_col    = structs{{nums_member, strings_member}, null_at(2)};
-  auto nums_col       = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
-  auto table          = cudf::table_view{{nums_col, structs_col}};
+  auto nums_member    = nums{{0, 1, 22, 333, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto strings_member = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto structs_col = cudf::test::structs_column_wrapper{{nums_member, strings_member},
+                                                        cudf::test::iterators::null_at(2)};
+  auto nums_col    = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
+  auto table       = cudf::table_view{{nums_col, structs_col}};
 
   auto expected_nums_col_1  = cudf::column(nums_col);
-  auto expected_structs_col = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
+  auto expected_structs_col = cudf::test::fixed_width_column_wrapper<bool>{
+    {1, 1, 0, 1, 1, 1, 1}, cudf::test::iterators::null_at(2)};
   auto expected_nums_col_2 =
     cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
   auto expected_strings_col =
@@ -144,30 +145,33 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
     {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
+  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
 
-  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)};
-  auto struct_0_strings_member =
-    strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}};
+  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto struct_0_strings_member = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto structs_1_structs_member =
+    cudf::test::structs_column_wrapper{{struct_0_nums_member, struct_0_strings_member}};
 
-  auto struct_1_nums_member  = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
-  auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};
-  auto table                 = cudf::table_view{{nums_col, struct_of_structs_col}};
+  auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(3)};
+  auto struct_of_structs_col =
+    cudf::test::structs_column_wrapper{{struct_1_nums_member, structs_1_structs_member}};
+  auto table = cudf::table_view{{nums_col, struct_of_structs_col}};
 
   auto expected_nums_col_1    = cudf::column(nums_col);
-  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper<bool>{{1, 1, 1, 1, 1, 1, 1}};
   auto expected_nums_col_2 =
     cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
-  auto expected_structs_col_2 = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper<bool>{{1, 1, 1, 1, 1, 1, 1}};
   auto expected_nums_col_3    = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
   auto expected_strings_col = cudf::column(
@@ -180,32 +184,35 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
                                     expected_strings_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
+  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
 
-  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)};
-  auto struct_0_strings_member =
-    strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto structs_1_structs_member =
-    structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)};
+  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto struct_0_strings_member = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto structs_1_structs_member = cudf::test::structs_column_wrapper{
+    {struct_0_nums_member, struct_0_strings_member}, cudf::test::iterators::null_at(2)};
 
-  auto struct_1_nums_member  = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
-  auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};
-  auto table                 = cudf::table_view{{nums_col, struct_of_structs_col}};
+  auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(3)};
+  auto struct_of_structs_col =
+    cudf::test::structs_column_wrapper{{struct_1_nums_member, structs_1_structs_member}};
+  auto table = cudf::table_view{{nums_col, struct_of_structs_col}};
 
   auto expected_nums_col_1    = cudf::column(nums_col);
-  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper<bool>{{1, 1, 1, 1, 1, 1, 1}};
   auto expected_nums_col_2 =
     cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
-  auto expected_structs_col_2 = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
-  auto expected_nums_col_3    = cudf::column(
+  auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper<bool>{
+    {1, 1, 0, 1, 1, 1, 1}, cudf::test::iterators::null_at(2)};
+  auto expected_nums_col_3 = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
   auto expected_strings_col = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
@@ -217,32 +224,36 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
                                     expected_strings_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
+  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
 
-  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)};
-  auto struct_0_strings_member =
-    strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto structs_1_structs_member = structs{{struct_0_nums_member, struct_0_strings_member}};
+  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto struct_0_strings_member = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto structs_1_structs_member =
+    cudf::test::structs_column_wrapper{{struct_0_nums_member, struct_0_strings_member}};
 
-  auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
-  auto struct_of_structs_col =
-    structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};
+  auto struct_1_nums_member  = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(3)};
+  auto struct_of_structs_col = cudf::test::structs_column_wrapper{
+    {struct_1_nums_member, structs_1_structs_member}, cudf::test::iterators::null_at(4)};
   auto table = cudf::table_view{{nums_col, struct_of_structs_col}};
 
   auto expected_nums_col_1    = cudf::column(nums_col);
-  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
+  auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper<bool>{
+    {1, 1, 1, 1, 0, 1, 1}, cudf::test::iterators::null_at(4)};
   auto expected_nums_col_2 =
     cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
-  auto expected_structs_col_2 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
-  auto expected_nums_col_3    = cudf::column(
+  auto expected_structs_col_2 = cudf::test::fixed_width_column_wrapper<bool>{
+    {1, 1, 1, 1, 0, 1, 1}, cudf::test::iterators::null_at(4)};
+  auto expected_nums_col_3 = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
   auto expected_strings_col = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
@@ -254,33 +265,36 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
                                     expected_strings_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
 {
   using T    = TypeParam;
-  using nums = fixed_width_column_wrapper<T, int32_t>;
+  using nums = cudf::test::fixed_width_column_wrapper<T, int32_t>;
 
-  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
+  auto nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, cudf::test::iterators::null_at(6)};
 
-  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(0)};
-  auto struct_0_strings_member =
-    strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
-  auto structs_1_structs_member =
-    structs{{struct_0_nums_member, struct_0_strings_member}, null_at(2)};
+  auto struct_0_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(0)};
+  auto struct_0_strings_member = cudf::test::strings_column_wrapper{
+    {"", "1", "22", "333", "4444", "55555", "666666"}, cudf::test::iterators::null_at(1)};
+  auto structs_1_structs_member = cudf::test::structs_column_wrapper{
+    {struct_0_nums_member, struct_0_strings_member}, cudf::test::iterators::null_at(2)};
 
-  auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
-  auto struct_of_structs_col =
-    structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};
+  auto struct_1_nums_member  = nums{{0, 1, 22, 33, 44, 55, 66}, cudf::test::iterators::null_at(3)};
+  auto struct_of_structs_col = cudf::test::structs_column_wrapper{
+    {struct_1_nums_member, structs_1_structs_member}, cudf::test::iterators::null_at(4)};
   auto table = cudf::table_view{{nums_col, struct_of_structs_col}};
 
   auto expected_nums_col_1    = cudf::column(nums_col);
-  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
+  auto expected_structs_col_1 = cudf::test::fixed_width_column_wrapper<bool>{
+    {1, 1, 1, 1, 0, 1, 1}, cudf::test::iterators::null_at(4)};
   auto expected_nums_col_2 =
     cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
-  auto expected_structs_col_2 = bools{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}};
-  auto expected_nums_col_3    = cudf::column(
+  auto expected_structs_col_2 =
+    cudf::test::fixed_width_column_wrapper<bool>{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}};
+  auto expected_nums_col_3 = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
   auto expected_strings_col = cudf::column(
     static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
@@ -292,25 +306,29 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
                                     expected_strings_col}};
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
-                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
+                                cudf::structs::detail::flatten_nested_columns(
+                                  table, {}, {}, cudf::structs::detail::column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
 {
   using T    = TypeParam;
-  using ints = fixed_width_column_wrapper<int32_t>;
-  using lcw  = lists_column_wrapper<T, int32_t>;
+  using ints = cudf::test::fixed_width_column_wrapper<int32_t>;
+  using lcw  = cudf::test::lists_column_wrapper<T, int32_t>;
 
   // clang-format off
   auto lists_member = lcw{  {0,1,2}, {3,4,5}, {6,7,8,9} };
   auto ints_member  = ints{       0,       1,         2 };
   // clang-format on
 
-  auto structs_with_lists_col = structs{lists_member, ints_member};
+  auto structs_with_lists_col = cudf::test::structs_column_wrapper{lists_member, ints_member};
 
-  EXPECT_THROW(flatten_nested_columns(
-                 cudf::table_view{{structs_with_lists_col}}, {}, {}, column_nullability::FORCE),
-               cudf::logic_error);
+  EXPECT_THROW(
+    cudf::structs::detail::flatten_nested_columns(cudf::table_view{{structs_with_lists_col}},
+                                                  {},
+                                                  {},
+                                                  cudf::structs::detail::column_nullability::FORCE),
+    cudf::logic_error);
 }
 
 struct SuperimposeTest : StructUtilitiesTest {
@@ -320,7 +338,7 @@ template <typename T>
 struct TypedSuperimposeTest : StructUtilitiesTest {
 };
 
-TYPED_TEST_SUITE(TypedSuperimposeTest, FixedWidthTypes);
+TYPED_TEST_SUITE(TypedSuperimposeTest, cudf::test::FixedWidthTypes);
 
 void test_non_struct_columns(cudf::column_view const& input)
 {
@@ -336,18 +354,21 @@ TYPED_TEST(TypedSuperimposeTest, NoStructInput)
 {
   using T = TypeParam;
 
-  test_non_struct_columns(fixed_width_column_wrapper<T>{{6, 5, 4, 3, 2, 1, 0}, null_at(3)});
-  test_non_struct_columns(
-    lists_column_wrapper<T, int32_t>{{{6, 5}, {4, 3}, {2, 1}, {0}}, null_at(3)});
-  test_non_struct_columns(strings{{"All", "The", "Leaves", "Are", "Brown"}, null_at(3)});
-  test_non_struct_columns(dictionary{{"All", "The", "Leaves", "Are", "Brown"}, null_at(3)});
+  test_non_struct_columns(cudf::test::fixed_width_column_wrapper<T>{
+    {6, 5, 4, 3, 2, 1, 0}, cudf::test::iterators::null_at(3)});
+  test_non_struct_columns(cudf::test::lists_column_wrapper<T, int32_t>{
+    {{6, 5}, {4, 3}, {2, 1}, {0}}, cudf::test::iterators::null_at(3)});
+  test_non_struct_columns(cudf::test::strings_column_wrapper{
+    {"All", "The", "Leaves", "Are", "Brown"}, cudf::test::iterators::null_at(3)});
+  test_non_struct_columns(cudf::test::dictionary_column_wrapper<std::string>{
+    {"All", "The", "Leaves", "Are", "Brown"}, cudf::test::iterators::null_at(3)});
 }
 
 /**
  * @brief Helper to construct a numeric member of a struct column.
  */
 template <typename T, typename NullIter>
-nums<T> make_nums_member(NullIter null_iter = no_nulls())
+nums<T> make_nums_member(NullIter null_iter = cudf::test::iterators::no_nulls())
 {
   return nums<T>{{10, 11, 12, 13, 14, 15, 16}, null_iter};
 }
@@ -356,7 +377,7 @@ nums<T> make_nums_member(NullIter null_iter = no_nulls())
  * @brief Helper to construct a lists member of a struct column.
  */
 template <typename T, typename NullIter>
-lists<T> make_lists_member(NullIter null_iter = no_nulls())
+lists<T> make_lists_member(NullIter null_iter = cudf::test::iterators::no_nulls())
 {
   return lists<T>{{{20, 20}, {21, 21}, {22, 22}, {23, 23}, {24, 24}, {25, 25}, {26, 26}},
                   null_iter};
@@ -366,9 +387,11 @@ TYPED_TEST(TypedSuperimposeTest, BasicStruct)
 {
   using T = TypeParam;
 
-  auto nums_member   = make_nums_member<T>(nulls_at({3, 6}));
-  auto lists_member  = make_lists_member<T>(nulls_at({4, 5}));
-  auto structs_input = structs{{nums_member, lists_member}, no_nulls()}.release();
+  auto nums_member   = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto lists_member  = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
+  auto structs_input = cudf::test::structs_column_wrapper{{nums_member, lists_member},
+                                                          cudf::test::iterators::no_nulls()}
+                         .release();
 
   // Reset STRUCTs' null-mask. Mark first STRUCT row as null.
   auto structs_view = structs_input->mutable_view();
@@ -376,17 +399,19 @@ TYPED_TEST(TypedSuperimposeTest, BasicStruct)
 
   // At this point, the STRUCT nulls aren't pushed down to members,
   // even though the parent null-mask was modified.
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_view.child(0), make_nums_member<T>(nulls_at({3, 6})));
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_view.child(1),
-                                      make_lists_member<T>(nulls_at({4, 5})));
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(structs_view.child(0),
+                                      make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6})));
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
+    structs_view.child(1), make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5})));
 
   auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(structs_view);
 
   // After superimpose_parent_nulls(), the struct nulls (i.e. at index-0) should have been pushed
   // down to the children. All members should have nulls at row-index 0.
-  auto expected_nums_member    = make_nums_member<T>(nulls_at({0, 3, 6}));
-  auto expected_lists_member   = make_lists_member<T>(nulls_at({0, 4, 5}));
-  auto expected_structs_output = structs{{expected_nums_member, expected_lists_member}, null_at(0)};
+  auto expected_nums_member    = make_nums_member<T>(cudf::test::iterators::nulls_at({0, 3, 6}));
+  auto expected_lists_member   = make_lists_member<T>(cudf::test::iterators::nulls_at({0, 4, 5}));
+  auto expected_structs_output = cudf::test::structs_column_wrapper{
+    {expected_nums_member, expected_lists_member}, cudf::test::iterators::null_at(0)};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_output);
 }
@@ -398,18 +423,21 @@ TYPED_TEST(TypedSuperimposeTest, NonNullableParentStruct)
 
   using T = TypeParam;
 
-  auto nums_member   = make_nums_member<T>(nulls_at({3, 6}));
-  auto lists_member  = make_lists_member<T>(nulls_at({4, 5}));
-  auto structs_input = structs{{nums_member, lists_member}, no_nulls()}.release();
+  auto nums_member   = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto lists_member  = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
+  auto structs_input = cudf::test::structs_column_wrapper{{nums_member, lists_member},
+                                                          cudf::test::iterators::no_nulls()}
+                         .release();
 
   auto [output, backing_buffers] =
     cudf::structs::detail::superimpose_parent_nulls(structs_input->view());
 
   // After superimpose_parent_nulls(), none of the child structs should have changed,
   // because the parent had no nulls to begin with.
-  auto expected_nums_member    = make_nums_member<T>(nulls_at({3, 6}));
-  auto expected_lists_member   = make_lists_member<T>(nulls_at({4, 5}));
-  auto expected_structs_output = structs{{expected_nums_member, expected_lists_member}, no_nulls()};
+  auto expected_nums_member    = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto expected_lists_member   = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
+  auto expected_structs_output = cudf::test::structs_column_wrapper{
+    {expected_nums_member, expected_lists_member}, cudf::test::iterators::no_nulls()};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_output);
 }
@@ -422,26 +450,30 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNonNullable)
 
   using T = TypeParam;
 
-  auto nums_member          = make_nums_member<T>(nulls_at({3, 6}));
-  auto lists_member         = make_lists_member<T>(nulls_at({4, 5}));
+  auto nums_member          = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto lists_member         = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
   auto outer_struct_members = std::vector<std::unique_ptr<cudf::column>>{};
-  outer_struct_members.push_back(structs{{nums_member, lists_member}, no_nulls()}.release());
+  outer_struct_members.push_back(cudf::test::structs_column_wrapper{
+    {nums_member, lists_member}, cudf::test::iterators::no_nulls()}
+                                   .release());
 
   // Reset STRUCTs' null-mask. Mark first STRUCT row as null.
   auto structs_view = outer_struct_members.back()->mutable_view();
   cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false);
 
-  auto structs_of_structs = structs{std::move(outer_struct_members)}.release();
+  auto structs_of_structs =
+    cudf::test::structs_column_wrapper{std::move(outer_struct_members)}.release();
 
   auto [output, backing_buffers] =
     cudf::structs::detail::superimpose_parent_nulls(structs_of_structs->view());
 
   // After superimpose_parent_nulls(), outer-struct column should not have pushed nulls to child
   // structs. But the child struct column must push its nulls to its own children.
-  auto expected_nums_member  = make_nums_member<T>(nulls_at({0, 3, 6}));
-  auto expected_lists_member = make_lists_member<T>(nulls_at({0, 4, 5}));
-  auto expected_structs      = structs{{expected_nums_member, expected_lists_member}, null_at(0)};
-  auto expected_structs_of_structs = structs{{expected_structs}};
+  auto expected_nums_member  = make_nums_member<T>(cudf::test::iterators::nulls_at({0, 3, 6}));
+  auto expected_lists_member = make_lists_member<T>(cudf::test::iterators::nulls_at({0, 4, 5}));
+  auto expected_structs      = cudf::test::structs_column_wrapper{
+    {expected_nums_member, expected_lists_member}, cudf::test::iterators::null_at(0)};
+  auto expected_structs_of_structs = cudf::test::structs_column_wrapper{{expected_structs}};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_of_structs);
 }
@@ -454,18 +486,21 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable)
 
   using T = TypeParam;
 
-  auto nums_member          = make_nums_member<T>(nulls_at({3, 6}));
-  auto lists_member         = make_lists_member<T>(nulls_at({4, 5}));
+  auto nums_member          = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto lists_member         = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
   auto outer_struct_members = std::vector<std::unique_ptr<cudf::column>>{};
-  outer_struct_members.push_back(structs{{nums_member, lists_member}, no_nulls()}.release());
+  outer_struct_members.push_back(cudf::test::structs_column_wrapper{
+    {nums_member, lists_member}, cudf::test::iterators::no_nulls()}
+                                   .release());
 
   // Reset STRUCTs' null-mask. Mark first STRUCT row as null.
   auto structs_view = outer_struct_members.back()->mutable_view();
   auto num_rows     = structs_view.size();
   cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false);
 
-  auto structs_of_structs =
-    structs{std::move(outer_struct_members), std::vector<bool>(num_rows, true)}.release();
+  auto structs_of_structs = cudf::test::structs_column_wrapper{std::move(outer_struct_members),
+                                                               std::vector<bool>(num_rows, true)}
+                              .release();
 
   // Modify STRUCT-of-STRUCT's null-mask. Mark second STRUCT row as null.
   auto structs_of_structs_view = structs_of_structs->mutable_view();
@@ -476,10 +511,12 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable)
 
   // After superimpose_parent_nulls(), outer-struct column should not have pushed nulls to child
   // structs. But the child struct column must push its nulls to its own children.
-  auto expected_nums_member  = make_nums_member<T>(nulls_at({0, 1, 3, 6}));
-  auto expected_lists_member = make_lists_member<T>(nulls_at({0, 1, 4, 5}));
-  auto expected_structs = structs{{expected_nums_member, expected_lists_member}, nulls_at({0, 1})};
-  auto expected_structs_of_structs = structs{{expected_structs}, null_at(1)};
+  auto expected_nums_member  = make_nums_member<T>(cudf::test::iterators::nulls_at({0, 1, 3, 6}));
+  auto expected_lists_member = make_lists_member<T>(cudf::test::iterators::nulls_at({0, 1, 4, 5}));
+  auto expected_structs      = cudf::test::structs_column_wrapper{
+    {expected_nums_member, expected_lists_member}, cudf::test::iterators::nulls_at({0, 1})};
+  auto expected_structs_of_structs =
+    cudf::test::structs_column_wrapper{{expected_structs}, cudf::test::iterators::null_at(1)};
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs_of_structs);
 }
@@ -489,7 +526,7 @@ cudf::column_view slice_off_first_and_last_rows(cudf::column_view const& col)
   return cudf::slice(col, {1, col.size() - 1})[0];
 }
 
-void mark_row_as_null(cudf::mutable_column_view const& col, size_type row_index)
+void mark_row_as_null(cudf::mutable_column_view const& col, cudf::size_type row_index)
 {
   cudf::detail::set_null_mask(col.null_mask(), row_index, row_index + 1, false);
 }
@@ -502,9 +539,11 @@ TYPED_TEST(TypedSuperimposeTest, Struct_Sliced)
 
   using T = TypeParam;
 
-  auto nums_member    = make_nums_member<T>(nulls_at({3, 6}));
-  auto lists_member   = make_lists_member<T>(nulls_at({4, 5}));
-  auto structs_column = structs{{nums_member, lists_member}, no_nulls()}.release();
+  auto nums_member    = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto lists_member   = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
+  auto structs_column = cudf::test::structs_column_wrapper{{nums_member, lists_member},
+                                                           cudf::test::iterators::no_nulls()}
+                          .release();
 
   // Reset STRUCTs' null-mask. Mark second STRUCT row as null.
   mark_row_as_null(structs_column->mutable_view(), 1);
@@ -531,10 +570,11 @@ TYPED_TEST(TypedSuperimposeTest, Struct_Sliced)
 
   // Construct expected columns using structs_column_wrapper, which should push the parent nulls
   // down automatically. Then, slice() off the ends.
-  auto expected_nums             = make_nums_member<T>(nulls_at({1, 3, 6}));
-  auto expected_lists            = make_lists_member<T>(nulls_at({1, 4, 5}));
-  auto expected_unsliced_structs = structs{{expected_nums, expected_lists}, nulls_at({1})};
-  auto expected_structs          = slice_off_first_and_last_rows(expected_unsliced_structs);
+  auto expected_nums             = make_nums_member<T>(cudf::test::iterators::nulls_at({1, 3, 6}));
+  auto expected_lists            = make_lists_member<T>(cudf::test::iterators::nulls_at({1, 4, 5}));
+  auto expected_unsliced_structs = cudf::test::structs_column_wrapper{
+    {expected_nums, expected_lists}, cudf::test::iterators::nulls_at({1})};
+  auto expected_structs = slice_off_first_and_last_rows(expected_unsliced_structs);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_structs);
 }
@@ -547,10 +587,13 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced)
 
   using T = TypeParam;
 
-  auto nums_member           = make_nums_member<T>(nulls_at({3, 6}));
-  auto lists_member          = make_lists_member<T>(nulls_at({4, 5}));
-  auto structs_column        = structs{{nums_member, lists_member}, null_at(1)};
-  auto struct_structs_column = structs{{structs_column}, no_nulls()}.release();
+  auto nums_member    = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto lists_member   = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
+  auto structs_column = cudf::test::structs_column_wrapper{{nums_member, lists_member},
+                                                           cudf::test::iterators::null_at(1)};
+  auto struct_structs_column =
+    cudf::test::structs_column_wrapper{{structs_column}, cudf::test::iterators::no_nulls()}
+      .release();
 
   // Reset STRUCT<STRUCT>'s null-mask. Mark third row as null.
   mark_row_as_null(struct_structs_column->mutable_view(), 2);
@@ -580,13 +623,13 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced)
 
   // Construct expected columns using structs_column_wrapper, which should push the parent nulls
   // down automatically. Then, slice() off the ends.
-  auto expected_nums           = make_nums_member<T>(nulls_at({3, 6}));
-  auto expected_lists          = make_lists_member<T>(nulls_at({4, 5}));
-  auto expected_structs        = structs{{expected_nums, expected_lists}, nulls_at({1})};
-  auto expected_struct_structs = structs{{expected_structs}, null_at(2)};
+  auto expected_nums    = make_nums_member<T>(cudf::test::iterators::nulls_at({3, 6}));
+  auto expected_lists   = make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5}));
+  auto expected_structs = cudf::test::structs_column_wrapper{{expected_nums, expected_lists},
+                                                             cudf::test::iterators::nulls_at({1})};
+  auto expected_struct_structs =
+    cudf::test::structs_column_wrapper{{expected_structs}, cudf::test::iterators::null_at(2)};
   auto expected_sliced_structs = slice_off_first_and_last_rows(expected_struct_structs);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(output, expected_sliced_structs);
 }
-
-}  // namespace cudf::test

From c806b10b389094952b4c4fb1ae2edb18b38e61e2 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 24 Oct 2022 13:54:41 -0400
Subject: [PATCH 064/202] Use gather-based strings factory in
 cudf::strings::strip (#11954)

Simplifies the `cudf::strings::strip` function to use the `cudf::make_strings_column` that accepts an iterator of pairs. This factory has a highly tuned gather implementation for building a strings column from an vector (iterator) of strings in device memory.
This was inspired by the review and work in #11946. This also gives a small improvement in the performance of small columns of large strings and even more improvement in large columns of large-ish strings for strip.
No function has changed just the internal implementation has been simplified.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Tobias Ribizel (https://github.com/upsj)

URL: https://github.com/rapidsai/cudf/pull/11954
---
 cpp/benchmarks/string/filter.cpp | 26 ++++++++------------
 cpp/src/strings/strip.cu         | 42 +++++++++++---------------------
 2 files changed, 24 insertions(+), 44 deletions(-)

diff --git a/cpp/benchmarks/string/filter.cpp b/cpp/benchmarks/string/filter.cpp
index f07c11ee6ca..cb805539651 100644
--- a/cpp/benchmarks/string/filter.cpp
+++ b/cpp/benchmarks/string/filter.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "string_bench_args.hpp"
+
 #include <benchmarks/common/generate_input.hpp>
 #include <benchmarks/fixture/benchmark_fixture.hpp>
 #include <benchmarks/synchronization/synchronization.hpp>
@@ -27,7 +29,6 @@
 #include <cudf/strings/translate.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
-#include <limits>
 #include <vector>
 
 enum FilterAPI { filter, filter_chars, strip };
@@ -62,21 +63,14 @@ static void BM_filter_chars(benchmark::State& state, FilterAPI api)
 
 static void generate_bench_args(benchmark::internal::Benchmark* b)
 {
-  int const min_rows   = 1 << 12;
-  int const max_rows   = 1 << 24;
-  int const row_mult   = 8;
-  int const min_rowlen = 1 << 5;
-  int const max_rowlen = 1 << 13;
-  int const len_mult   = 4;
-  for (int row_count = min_rows; row_count <= max_rows; row_count *= row_mult) {
-    for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= len_mult) {
-      // avoid generating combinations that exceed the cudf column limit
-      size_t total_chars = static_cast<size_t>(row_count) * rowlen;
-      if (total_chars < static_cast<size_t>(std::numeric_limits<cudf::size_type>::max())) {
-        b->Args({row_count, rowlen});
-      }
-    }
-  }
+  int const min_rows          = 1 << 12;
+  int const max_rows          = 1 << 24;
+  int const row_multiplier    = 8;
+  int const min_length        = 1 << 5;
+  int const max_length        = 1 << 13;
+  int const length_multiplier = 2;
+  generate_string_bench_args(
+    b, min_rows, max_rows, row_multiplier, min_length, max_length, length_multiplier);
 }
 
 #define STRINGS_BENCHMARK_DEFINE(name)                                \
diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu
index baa6a27b4ba..e982050b8d6 100644
--- a/cpp/src/strings/strip.cu
+++ b/cpp/src/strings/strip.cu
@@ -15,11 +15,9 @@
  */
 
 #include <cudf/column/column_device_view.cuh>
-#include <cudf/column/column_factories.hpp>
-#include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/strings/detail/strings_column_factories.cuh>
 #include <cudf/strings/detail/strip.cuh>
-#include <cudf/strings/detail/utilities.cuh>
 #include <cudf/strings/string_view.cuh>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/strings/strip.hpp>
@@ -35,35 +33,24 @@ namespace detail {
 namespace {
 
 /**
- * @brief Strip characters from the beginning and/or end of a string.
+ * @brief Strip characters from the beginning and/or end of a string
  *
  * This functor strips the beginning and/or end of each string
  * of any characters found in d_to_strip or whitespace if
  * d_to_strip is empty.
  *
  */
-struct strip_fn {
+struct strip_transform_fn {
   column_device_view const d_strings;
   side_type const side;  // right, left, or both
   string_view const d_to_strip;
-  int32_t* d_offsets{};
-  char* d_chars{};
 
-  __device__ void operator()(size_type idx)
+  __device__ string_index_pair operator()(size_type idx)
   {
-    if (d_strings.is_null(idx)) {
-      if (!d_chars) d_offsets[idx] = 0;
-      return;
-    }
-
-    auto const d_str = d_strings.element<string_view>(idx);
-
+    if (d_strings.is_null(idx)) { return string_index_pair{nullptr, 0}; }
+    auto const d_str      = d_strings.element<string_view>(idx);
     auto const d_stripped = strip(d_str, d_to_strip, side);
-    if (d_chars) {
-      copy_string(d_chars + d_offsets[idx], d_stripped);
-    } else {
-      d_offsets[idx] = d_stripped.size_bytes();
-    }
+    return string_index_pair{d_stripped.data(), d_stripped.size_bytes()};
   }
 };
 
@@ -83,15 +70,14 @@ std::unique_ptr<column> strip(
 
   auto const d_column = column_device_view::create(input.parent(), stream);
 
-  // this utility calls the strip_fn to build the offsets and chars columns
-  auto children = cudf::strings::detail::make_strings_children(
-    strip_fn{*d_column, side, d_to_strip}, input.size(), stream, mr);
+  auto result = rmm::device_uvector<string_index_pair>(input.size(), stream);
+  thrust::transform(rmm::exec_policy(stream),
+                    thrust::counting_iterator<size_type>(0),
+                    thrust::counting_iterator<size_type>(input.size()),
+                    result.begin(),
+                    strip_transform_fn{*d_column, side, d_to_strip});
 
-  return make_strings_column(input.size(),
-                             std::move(children.first),
-                             std::move(children.second),
-                             input.null_count(),
-                             cudf::detail::copy_bitmask(input.parent(), stream, mr));
+  return make_strings_column(result.begin(), result.end(), stream, mr);
 }
 
 }  // namespace detail

From 1e93af87748f2c06f4e90da986e729d859b0b60a Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Mon, 24 Oct 2022 14:33:40 -0500
Subject: [PATCH 065/202] Add gpu memory watermark apis to JNI (#11950)

This PR addresses https://github.com/rapidsai/cudf/issues/11949.

We are adding methods to get the current memory usage watermarks at the whole process level and adding a "scoped" maximum, where the user can reset the initial value, run cuDF functions, and then call the API to get what happened since the reset.

For the scoped maximum, the `getScopedMaximumOutstanding` could have somewhat surprising results. If the scoped maximum is reset to 0 for example, and we only see frees for allocations done before the reset, we are going to see that the scoped maximum returned is 0. This is because our memory usage is literally negative in this scenario.

The APIs here assume that the caller process is using a single thread to call into the GPU (for Spark it would be 1 concurrent task).

Note I assume `Rmm.initialize` has been called, otherwise this doesn't track allocations done before that.

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Jim Brennan (https://github.com/jbrennan333)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/11950
---
 java/src/main/java/ai/rapids/cudf/Rmm.java    | 43 ++++++++++
 java/src/main/native/src/RmmJni.cpp           | 71 ++++++++++++++++
 .../src/test/java/ai/rapids/cudf/RmmTest.java | 83 +++++++++++++++++++
 3 files changed, 197 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
index 730f82f0047..0b825937815 100755
--- a/java/src/main/java/ai/rapids/cudf/Rmm.java
+++ b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -136,6 +136,49 @@ public static boolean isInitialized() throws RmmException {
    */
   public static native long getTotalBytesAllocated();
 
+  /**
+   * Returns the maximum amount of RMM memory (Bytes) outstanding during the
+   * lifetime of the process.
+   */
+  public static native long getMaximumTotalBytesAllocated();
+
+  /**
+   * Resets a scoped maximum counter of RMM memory used to keep track of usage between
+   * code sections while debugging.
+   *
+   * @param initialValue an initial value (in Bytes) to use for this scoped counter
+   */
+  public static void resetScopedMaximumBytesAllocated(long initialValue) {
+    resetScopedMaximumBytesAllocatedInternal(initialValue);
+  }
+
+  /**
+   * Resets a scoped maximum counter of RMM memory used to keep track of usage between
+   * code sections while debugging.
+   *
+   * This resets the counter to 0 Bytes.
+   */
+  public static void resetScopedMaximumBytesAllocated() {
+    resetScopedMaximumBytesAllocatedInternal(0L);
+  }
+
+  private static native void resetScopedMaximumBytesAllocatedInternal(long initialValue);
+
+  /**
+   * Returns the maximum amount of RMM memory (Bytes) outstanding since the last
+   * `resetScopedMaximumOutstanding` call was issued (it is "scoped" because it's the
+   * maximum amount seen since the last reset).
+   *
+   * If the memory used is net negative (for example if only frees happened since
+   * reset, and we reset to 0), then result will be 0.
+   *
+   * If `resetScopedMaximumBytesAllocated` is never called, the scope is the whole
+   * program and is equivalent to `getMaximumTotalBytesAllocated`.
+   *
+   * @return the scoped maximum bytes allocated
+   */
+  public static native long getScopedMaximumBytesAllocated();
+
   /**
    * Sets the event handler to be called on RMM events (e.g.: allocation failure).
    * @param handler event handler to invoke on RMM events or null to clear an existing handler
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index 2b4c5ae59f5..529345b6bd8 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -19,6 +19,7 @@
 #include <fstream>
 #include <iostream>
 #include <limits>
+#include <mutex>
 
 #include <rmm/mr/device/aligned_resource_adaptor.hpp>
 #include <rmm/mr/device/arena_memory_resource.hpp>
@@ -50,6 +51,12 @@ constexpr char const *RMM_EXCEPTION_CLASS = "ai/rapids/cudf/RmmException";
 class base_tracking_resource_adaptor : public device_memory_resource {
 public:
   virtual std::size_t get_total_allocated() = 0;
+
+  virtual std::size_t get_max_total_allocated() = 0;
+
+  virtual void reset_scoped_max_total_allocated(std::size_t initial_value) = 0;
+
+  virtual std::size_t get_scoped_max_total_allocated() = 0;
 };
 
 /**
@@ -79,11 +86,35 @@ class tracking_resource_adaptor final : public base_tracking_resource_adaptor {
 
   std::size_t get_total_allocated() override { return total_allocated.load(); }
 
+  std::size_t get_max_total_allocated() override { return max_total_allocated; }
+
+  void reset_scoped_max_total_allocated(std::size_t initial_value) override {
+    std::scoped_lock lock(max_total_allocated_mutex);
+    scoped_allocated = 0;
+    scoped_max_total_allocated = initial_value;
+  }
+
+  std::size_t get_scoped_max_total_allocated() override { return scoped_max_total_allocated; }
+
 private:
   Upstream *const resource;
   std::size_t const size_align;
+  // sum of what is currently allocated
   std::atomic_size_t total_allocated{0};
 
+  // the maximum total allocated for the lifetime of this class
+  std::size_t max_total_allocated{0};
+
+  // the sum of what is currently outstanding from the last
+  // `reset_scoped_max_total_allocated` call. This can be negative.
+  std::atomic_long scoped_allocated{0};
+
+  // the maximum total allocated relative to the last
+  // `reset_scoped_max_total_allocated` call.
+  long scoped_max_total_allocated{0};
+
+  std::mutex max_total_allocated_mutex;
+
   void *do_allocate(std::size_t num_bytes, rmm::cuda_stream_view stream) override {
     // adjust size of allocation based on specified size alignment
     num_bytes = (num_bytes + size_align - 1) / size_align * size_align;
@@ -91,6 +122,11 @@ class tracking_resource_adaptor final : public base_tracking_resource_adaptor {
     auto result = resource->allocate(num_bytes, stream);
     if (result) {
       total_allocated += num_bytes;
+      scoped_allocated += num_bytes;
+
+      std::scoped_lock lock(max_total_allocated_mutex);
+      max_total_allocated = std::max(total_allocated.load(), max_total_allocated);
+      scoped_max_total_allocated = std::max(scoped_allocated.load(), scoped_max_total_allocated);
     }
     return result;
   }
@@ -102,6 +138,7 @@ class tracking_resource_adaptor final : public base_tracking_resource_adaptor {
 
     if (p) {
       total_allocated -= size;
+      scoped_allocated -= size;
     }
   }
 
@@ -132,6 +169,26 @@ std::size_t get_total_bytes_allocated() {
   return 0;
 }
 
+std::size_t get_max_total_allocated() {
+  if (Tracking_memory_resource) {
+    return Tracking_memory_resource->get_max_total_allocated();
+  }
+  return 0;
+}
+
+void reset_scoped_max_total_allocated(std::size_t initial_value) {
+  if (Tracking_memory_resource) {
+    return Tracking_memory_resource->reset_scoped_max_total_allocated(initial_value);
+  }
+}
+
+std::size_t get_scoped_max_total_allocated() {
+  if (Tracking_memory_resource) {
+    return Tracking_memory_resource->get_scoped_max_total_allocated();
+  }
+  return 0;
+}
+
 /**
  * @brief An RMM device memory resource adaptor that delegates to the wrapped resource
  * for most operations but will call Java to handle certain situations (e.g.: allocation failure).
@@ -455,6 +512,20 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_getTotalBytesAllocated(JNIEnv *e
   return get_total_bytes_allocated();
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_getMaximumTotalBytesAllocated(JNIEnv *env, jclass) {
+  return get_max_total_allocated();
+}
+
+JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_resetScopedMaximumBytesAllocatedInternal(
+    JNIEnv *env, jclass, long initialValue) {
+  reset_scoped_max_total_allocated(initialValue);
+}
+
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_getScopedMaximumBytesAllocated(JNIEnv *env,
+                                                                               jclass) {
+  return get_scoped_max_total_allocated();
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_allocInternal(JNIEnv *env, jclass clazz, jlong size,
                                                               jlong stream) {
   try {
diff --git a/java/src/test/java/ai/rapids/cudf/RmmTest.java b/java/src/test/java/ai/rapids/cudf/RmmTest.java
index 09fbedd8a1c..18ff5f4081e 100644
--- a/java/src/test/java/ai/rapids/cudf/RmmTest.java
+++ b/java/src/test/java/ai/rapids/cudf/RmmTest.java
@@ -65,6 +65,89 @@ public void testTotalAllocated(int rmmAllocMode) {
     assertEquals(0, Rmm.getTotalBytesAllocated());
   }
 
+  @ParameterizedTest
+  @ValueSource(ints = {
+      RmmAllocationMode.CUDA_DEFAULT,
+      RmmAllocationMode.POOL,
+      RmmAllocationMode.ARENA})
+  public void testMaxOutstanding(int rmmAllocMode) {
+    Rmm.initialize(rmmAllocMode, Rmm.logToStderr(), 512 * 1024 * 1024);
+    assertEquals(0, Rmm.getMaximumTotalBytesAllocated());
+    try (DeviceMemoryBuffer ignored = Rmm.alloc(1024)) {
+      assertEquals(1024, Rmm.getMaximumTotalBytesAllocated());
+    }
+    assertEquals(0, Rmm.getTotalBytesAllocated());
+    assertEquals(1024, Rmm.getMaximumTotalBytesAllocated());
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {
+      RmmAllocationMode.CUDA_DEFAULT,
+      RmmAllocationMode.POOL,
+      RmmAllocationMode.ARENA})
+  public void testScopedMaxOutstanding(int rmmAllocMode) {
+    Rmm.initialize(rmmAllocMode, Rmm.logToStderr(), 512 * 1024 * 1024);
+    assertEquals(0, Rmm.getMaximumTotalBytesAllocated());
+    try (DeviceMemoryBuffer ignored = Rmm.alloc(1024);
+         DeviceMemoryBuffer ignored2 = Rmm.alloc(1024)) {
+      assertEquals(2048, Rmm.getScopedMaximumBytesAllocated());
+    }
+    assertEquals(0, Rmm.getTotalBytesAllocated());
+    assertEquals(2048, Rmm.getScopedMaximumBytesAllocated());
+
+    Rmm.resetScopedMaximumBytesAllocated();
+    assertEquals(0, Rmm.getScopedMaximumBytesAllocated());
+    assertEquals(2048, Rmm.getMaximumTotalBytesAllocated());
+
+    DeviceMemoryBuffer ignored = Rmm.alloc(1024);
+    ignored.close();
+    assertEquals(1024, Rmm.getScopedMaximumBytesAllocated());
+    assertEquals(2048, Rmm.getMaximumTotalBytesAllocated());
+    assertEquals(0, Rmm.getTotalBytesAllocated());
+
+    // a non-zero value is the new minimum
+    DeviceMemoryBuffer ignored2 = Rmm.alloc(1024);
+    ignored2.close();
+    Rmm.resetScopedMaximumBytesAllocated(10000);
+    assertEquals(10000, Rmm.getScopedMaximumBytesAllocated());
+    assertEquals(2048, Rmm.getMaximumTotalBytesAllocated());
+
+    try(DeviceMemoryBuffer ignored3 = Rmm.alloc(1024)) {
+      Rmm.resetScopedMaximumBytesAllocated(1024);
+      try (DeviceMemoryBuffer ignored4 = Rmm.alloc(20480)) {
+        assertEquals(20480, Rmm.getScopedMaximumBytesAllocated());
+        assertEquals(21504, Rmm.getMaximumTotalBytesAllocated());
+      }
+    }
+  }
+
+  @ParameterizedTest
+  @ValueSource(ints = {
+      RmmAllocationMode.CUDA_DEFAULT,
+      RmmAllocationMode.POOL,
+      RmmAllocationMode.ARENA})
+  public void testScopedMaxOutstandingNegative(int rmmAllocMode) {
+    Rmm.initialize(rmmAllocMode, Rmm.logToStderr(), 512 * 1024 * 1024);
+    assertEquals(0, Rmm.getMaximumTotalBytesAllocated());
+    try (DeviceMemoryBuffer ignored = Rmm.alloc(1024);
+         DeviceMemoryBuffer ignored2 = Rmm.alloc(1024)) {
+      assertEquals(2048, Rmm.getScopedMaximumBytesAllocated());
+      Rmm.resetScopedMaximumBytesAllocated();
+      assertEquals(0, Rmm.getScopedMaximumBytesAllocated());
+    }
+    // because we allocated a net -2048 Bytes since reset
+    assertEquals(0, Rmm.getScopedMaximumBytesAllocated());
+    DeviceMemoryBuffer ignored = Rmm.alloc(1024);
+    ignored.close();
+    assertEquals(0, Rmm.getScopedMaximumBytesAllocated());
+
+    // if we allocate 2KB and then 256B we start seeing a positive local maximum
+    try (DeviceMemoryBuffer ignored2 = Rmm.alloc(2048);
+         DeviceMemoryBuffer ignored3 = Rmm.alloc(256)) {
+      assertEquals(256, Rmm.getScopedMaximumBytesAllocated());
+    }
+  }
+
   @ParameterizedTest
   @ValueSource(ints = {
       RmmAllocationMode.CUDA_DEFAULT,

From 11918aeec9fd2204112398fea19bec8718fab640 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 24 Oct 2022 17:09:28 -0500
Subject: [PATCH 066/202] Add dtype docs pages and docstrings for `cudf`
 specific dtypes (#11974)

Resolves #11605
This PR:

- [x] Creates docs page entries for `cudf.CategoricalDtype`, `cudf.ListDtype`, `cudf.StructDtype`, `cudf.Decimal32Dtype`, `cudf.Decimal64Dtype`, `cudf.Decimal128Dtype`.
- [x] Updates docstrings in all of the public APIs of the above dtypes.
- [x] Links them in the `data-types.md` page where all supported dtypes are listed as a table.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11974
---
 .../user_guide/cudf.CategoricalDtype.rst      |  19 ++
 .../user_guide/cudf.Decimal128Dtype.rst       |  20 ++
 .../source/user_guide/cudf.Decimal32Dtype.rst |  20 ++
 .../source/user_guide/cudf.Decimal64Dtype.rst |  20 ++
 .../cudf/source/user_guide/cudf.ListDtype.rst |  19 ++
 .../source/user_guide/cudf.StructDtype.rst    |  18 +
 docs/cudf/source/user_guide/data-types.md     |  24 +-
 python/cudf/cudf/core/dtypes.py               | 312 ++++++++++++++++--
 8 files changed, 413 insertions(+), 39 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
 create mode 100644 docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
 create mode 100644 docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
 create mode 100644 docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
 create mode 100644 docs/cudf/source/user_guide/cudf.ListDtype.rst
 create mode 100644 docs/cudf/source/user_guide/cudf.StructDtype.rst

diff --git a/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst b/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
new file mode 100644
index 00000000000..b43de86fc8f
--- /dev/null
+++ b/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
@@ -0,0 +1,19 @@
+cudf.CategoricalDtype
+=====================
+
+.. currentmodule:: cudf
+
+.. autoclass:: CategoricalDtype
+   :members: categories, ordered, from_pandas, to_pandas
+
+   
+   
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      CategoricalDtype.categories
+      CategoricalDtype.ordered
+      CategoricalDtype.from_pandas
+      CategoricalDtype.to_pandas
\ No newline at end of file
diff --git a/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
new file mode 100644
index 00000000000..20f6aea1299
--- /dev/null
+++ b/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
@@ -0,0 +1,20 @@
+cudf.Decimal128Dtype
+===================
+
+.. currentmodule:: cudf
+
+.. autoclass:: Decimal128Dtype
+   :members: precision, scale, itemsize, to_arrow, from_arrow
+
+   
+   
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      Decimal128Dtype.precision
+      Decimal128Dtype.scale
+      Decimal128Dtype.itemsize
+      Decimal128Dtype.to_arrow
+      Decimal128Dtype.from_arrow
\ No newline at end of file
diff --git a/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
new file mode 100644
index 00000000000..a92e695b4af
--- /dev/null
+++ b/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
@@ -0,0 +1,20 @@
+cudf.Decimal32Dtype
+===================
+
+.. currentmodule:: cudf
+
+.. autoclass:: Decimal32Dtype
+   :members: precision, scale, itemsize, to_arrow, from_arrow
+
+   
+   
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      Decimal32Dtype.precision
+      Decimal32Dtype.scale
+      Decimal32Dtype.itemsize
+      Decimal32Dtype.to_arrow
+      Decimal32Dtype.from_arrow
\ No newline at end of file
diff --git a/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
new file mode 100644
index 00000000000..3982ec7ad80
--- /dev/null
+++ b/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
@@ -0,0 +1,20 @@
+cudf.Decimal64Dtype
+===================
+
+.. currentmodule:: cudf
+
+.. autoclass:: Decimal64Dtype
+   :members: precision, scale, itemsize, to_arrow, from_arrow
+
+   
+   
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      Decimal64Dtype.precision
+      Decimal64Dtype.scale
+      Decimal64Dtype.itemsize
+      Decimal64Dtype.to_arrow
+      Decimal64Dtype.from_arrow
\ No newline at end of file
diff --git a/docs/cudf/source/user_guide/cudf.ListDtype.rst b/docs/cudf/source/user_guide/cudf.ListDtype.rst
new file mode 100644
index 00000000000..6f37317c898
--- /dev/null
+++ b/docs/cudf/source/user_guide/cudf.ListDtype.rst
@@ -0,0 +1,19 @@
+cudf.ListDtype
+==============
+
+.. currentmodule:: cudf
+
+.. autoclass:: ListDtype
+   :members: element_type, leaf_type, from_arrow, to_arrow
+
+   
+   
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      ListDtype.element_type
+      ListDtype.leaf_type
+      ListDtype.from_arrow
+      ListDtype.to_arrow
\ No newline at end of file
diff --git a/docs/cudf/source/user_guide/cudf.StructDtype.rst b/docs/cudf/source/user_guide/cudf.StructDtype.rst
new file mode 100644
index 00000000000..68fa71b8231
--- /dev/null
+++ b/docs/cudf/source/user_guide/cudf.StructDtype.rst
@@ -0,0 +1,18 @@
+cudf.StructDtype
+================
+
+.. currentmodule:: cudf
+
+.. autoclass:: StructDtype
+   :members: fields, from_arrow, to_arrow
+
+   
+   
+..
+   HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
+   .. autosummary::
+      :toctree:
+      
+      StructDtype.fields
+      StructDtype.from_arrow
+      StructDtype.to_arrow
\ No newline at end of file
diff --git a/docs/cudf/source/user_guide/data-types.md b/docs/cudf/source/user_guide/data-types.md
index 664540b4592..85152930a73 100644
--- a/docs/cudf/source/user_guide/data-types.md
+++ b/docs/cudf/source/user_guide/data-types.md
@@ -9,18 +9,18 @@ All data types in cuDF are [nullable](missing-data).
 
 <div class="special-table">
 
-| Kind of data         | Data type(s)                                                                    |
-|----------------------|---------------------------------------------------------------------------------|
-| Signed integer       | `'int8'`, `'int16'`, `'int32'`, `'int64'`                                       |
-| Unsigned integer     | `'uint32'`, `'uint64'`                                                          |
-| Floating-point       | `'float32'`, `'float64'`                                                        |
-| Datetime             | `'datetime64[s]'`, `'datetime64[ms]'`, `'datetime64['us']`, `'datetime64[ns]'`  |
-| Timedelta (duration) | `'timedelta[s]'`, `'timedelta[ms]'`, `'timedelta['us']`, `'timedelta[ns]'`      |
-| Category             | `cudf.CategoricalDtype`                                                         |
-| String               | `'object'` or `'string'`                                                        |
-| Decimal              | `cudf.Decimal32Dtype`, `cudf.Decimal64Dtype`, `cudf.Decimal64Dtype`             |
-| List                 | `cudf.ListDtype`                                                                |
-| Struct               | `cudf.StructDtype`                                                              |
+| Kind of data         | Data type(s)                                                                                      |
+|----------------------|---------------------------------------------------------------------------------------------------|
+| Signed integer       | `'int8'`, `'int16'`, `'int32'`, `'int64'`                                                         |
+| Unsigned integer     | `'uint32'`, `'uint64'`                                                                            |
+| Floating-point       | `'float32'`, `'float64'`                                                                          |
+| Datetime             | `'datetime64[s]'`, `'datetime64[ms]'`, `'datetime64['us']`, `'datetime64[ns]'`                    |
+| Timedelta (duration) | `'timedelta[s]'`, `'timedelta[ms]'`, `'timedelta['us']`, `'timedelta[ns]'`                        |
+| Category             | {py:func}`cudf.CategoricalDtype`                                                                  |
+| String               | `'object'` or `'string'`                                                                          |
+| Decimal              | {py:func}`cudf.Decimal32Dtype`, {py:func}`cudf.Decimal64Dtype`, {py:func}`cudf.Decimal128Dtype`   |
+| List                 | {py:func}`cudf.ListDtype`                                                                         |
+| Struct               | {py:func}`cudf.StructDtype`                                                                       |
 
 </div>
 
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 5cff057ce7c..84f528549e9 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -3,6 +3,7 @@
 import decimal
 import operator
 import pickle
+import textwrap
 from typing import Any, Callable, Dict, List, Tuple, Type, Union
 
 import numpy as np
@@ -20,6 +21,7 @@
 from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
 from cudf.core.abc import Serializable
 from cudf.core.buffer import DeviceBufferLike
+from cudf.utils.docutils import doc_apply
 
 if PANDAS_GE_150:
     from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
@@ -152,14 +154,22 @@ class CategoricalDtype(_BaseDtype):
     Categories (2, object): ['b' < 'a']
     """
 
-    ordered: bool
-
     def __init__(self, categories=None, ordered: bool = False) -> None:
         self._categories = self._init_categories(categories)
-        self.ordered = ordered
+        self._ordered = ordered
 
     @property
     def categories(self) -> "cudf.core.index.BaseIndex":
+        """
+        An ``Index`` containing the unique categories allowed.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> dtype = cudf.CategoricalDtype(categories=['b', 'a'], ordered=True)
+        >>> dtype.categories
+        StringIndex(['b' 'a'], dtype='object')
+        """
         if self._categories is None:
             return cudf.core.index.as_index(
                 cudf.core.column.column_empty(0, dtype="object", masked=False)
@@ -178,13 +188,50 @@ def name(self):
     def str(self):
         return "|O08"
 
+    @property
+    def ordered(self) -> bool:
+        """
+        Whether the categories have an ordered relationship.
+        """
+        return self._ordered
+
+    @ordered.setter
+    def ordered(self, value) -> None:
+        self._ordered = value
+
     @classmethod
     def from_pandas(cls, dtype: pd.CategoricalDtype) -> "CategoricalDtype":
+        """
+        Convert a ``pandas.CategrocialDtype`` to ``cudf.CategoricalDtype``
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pandas as pd
+        >>> pd_dtype = pd.CategoricalDtype(categories=['b', 'a'], ordered=True)
+        >>> pd_dtype
+        CategoricalDtype(categories=['b', 'a'], ordered=True)
+        >>> cudf_dtype = cudf.CategoricalDtype.from_pandas(pd_dtype)
+        >>> cudf_dtype
+        CategoricalDtype(categories=['b', 'a'], ordered=True)
+        """
         return CategoricalDtype(
             categories=dtype.categories, ordered=dtype.ordered
         )
 
     def to_pandas(self) -> pd.CategoricalDtype:
+        """
+        Convert a ``cudf.CategoricalDtype`` to ``pandas.CategoricalDtype``
+
+        Examples
+        --------
+        >>> import cudf
+        >>> dtype = cudf.CategoricalDtype(categories=['b', 'a'], ordered=True)
+        >>> dtype
+        CategoricalDtype(categories=['b', 'a'], ordered=True)
+        >>> dtype.to_pandas()
+        CategoricalDtype(categories=['b', 'a'], ordered=True)
+        """
         if self._categories is None:
             categories = None
         else:
@@ -257,8 +304,33 @@ def deserialize(cls, header, frames):
         )
         return klass(categories=categories, ordered=ordered)
 
+    def __repr__(self):
+        return self.to_pandas().__repr__()
+
 
 class ListDtype(_BaseDtype):
+    """
+    Type to represent list data.
+
+    Parameters
+    ----------
+    element_type : object
+        A dtype with which represents the element types in the list.
+
+    Examples
+    --------
+    >>> import cudf
+    >>> list_dtype = cudf.ListDtype("int32")
+    >>> list_dtype
+    ListDtype(int32)
+
+    A nested list dtype can be created by:
+
+    >>> nested_list_dtype = cudf.ListDtype(list_dtype)
+    >>> nested_list_dtype
+    ListDtype(ListDtype(int32))
+    """
+
     _typ: pa.ListType
     name: str = "list"
 
@@ -273,6 +345,26 @@ def __init__(self, element_type: Any) -> None:
 
     @property
     def element_type(self) -> Dtype:
+        """
+        Returns the element type of the ``ListDtype``.
+
+        Returns
+        -------
+        Dtype
+
+        Examples
+        --------
+        >>> import cudf
+        >>> deep_nested_type = cudf.ListDtype(cudf.ListDtype(cudf.ListDtype("float32")))
+        >>> deep_nested_type
+        ListDtype(ListDtype(ListDtype(float32)))
+        >>> deep_nested_type.element_type
+        ListDtype(ListDtype(float32))
+        >>> deep_nested_type.element_type.element_type
+        ListDtype(float32)
+        >>> deep_nested_type.element_type.element_type.element_type
+        'float32'
+        """  # noqa: E501
         if isinstance(self._typ.value_type, pa.ListType):
             return ListDtype.from_arrow(self._typ.value_type)
         elif isinstance(self._typ.value_type, pa.StructType):
@@ -282,6 +374,18 @@ def element_type(self) -> Dtype:
 
     @property
     def leaf_type(self):
+        """
+        Returns the type of the leaf values.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> deep_nested_type = cudf.ListDtype(cudf.ListDtype(cudf.ListDtype("float32")))
+        >>> deep_nested_type
+        ListDtype(ListDtype(ListDtype(float32)))
+        >>> deep_nested_type.leaf_type
+        'float32'
+        """  # noqa: E501
         if isinstance(self.element_type, ListDtype):
             return self.element_type.leaf_type
         else:
@@ -295,11 +399,47 @@ def type(self):
 
     @classmethod
     def from_arrow(cls, typ):
+        """
+        Creates a ``ListDtype`` from ``pyarrow.ListType``.
+
+        Parameters
+        ----------
+        typ : pyarrow.ListType
+            A ``pyarrow.ListType`` that has to be converted to
+            ``ListDtype``.
+
+        Returns
+        -------
+        obj : ``ListDtype``
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pyarrow as pa
+        >>> arrow_type = pa.infer_type([[1]])
+        >>> arrow_type
+        ListType(list<item: int64>)
+        >>> list_dtype = cudf.ListDtype.from_arrow(arrow_type)
+        >>> list_dtype
+        ListDtype(int64)
+        """
         obj = object.__new__(cls)
         obj._typ = typ
         return obj
 
     def to_arrow(self):
+        """
+        Convert to a ``pyarrow.ListType``
+
+        Examples
+        --------
+        >>> import cudf
+        >>> list_dtype = cudf.ListDtype(cudf.ListDtype("float32"))
+        >>> list_dtype
+        ListDtype(ListDtype(float32))
+        >>> list_dtype.to_arrow()
+        ListType(list<item: list<item: float>>)
+        """
         return self._typ
 
     def __eq__(self, other):
@@ -345,9 +485,27 @@ def deserialize(cls, header: dict, frames: list):
 
 class StructDtype(_BaseDtype):
     """
+    Type to represent a struct data.
+
+    Parameters
+    ----------
     fields : dict
-        A mapping of field names to dtypes
-    """
+        A mapping of field names to dtypes, the dtypes can themselves
+        be of ``StructDtype`` too.
+
+    Examples
+    --------
+    >>> import cudf
+    >>> struct_dtype = cudf.StructDtype({"a": "int64", "b": "string"})
+    >>> struct_dtype
+    StructDtype({'a': dtype('int64'), 'b': dtype('O')})
+
+    A nested ``StructDtype`` can also be constructed in the following way:
+
+    >>> nested_struct_dtype = cudf.StructDtype({"dict_data": struct_dtype, "c": "uint8"})
+    >>> nested_struct_dtype
+    StructDtype({'dict_data': StructDtype({'a': dtype('int64'), 'b': dtype('O')}), 'c': dtype('uint8')})
+    """  # noqa: E501
 
     name = "struct"
 
@@ -360,6 +518,18 @@ def __init__(self, fields):
 
     @property
     def fields(self):
+        """
+        Returns an ordered dict of column name and dtype key-value.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> struct_dtype = cudf.StructDtype({"a": "int64", "b": "string"})
+        >>> struct_dtype
+        StructDtype({'a': dtype('int64'), 'b': dtype('O')})
+        >>> struct_dtype.fields
+        {'a': dtype('int64'), 'b': dtype('O')}
+        """
         return {
             field.name: cudf.utils.dtypes.cudf_dtype_from_pa_type(field.type)
             for field in self._typ
@@ -373,11 +543,36 @@ def type(self):
 
     @classmethod
     def from_arrow(cls, typ):
+        """
+        Convert a ``pyarrow.StructType`` to ``StructDtype``.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pyarrow as pa
+        >>> pa_struct_type = pa.struct({'x': pa.int32(), 'y': pa.string()})
+        >>> pa_struct_type
+        StructType(struct<x: int32, y: string>)
+        >>> cudf.StructDtype.from_arrow(pa_struct_type)
+        StructDtype({'x': dtype('int32'), 'y': dtype('O')})
+        """
         obj = object.__new__(cls)
         obj._typ = typ
         return obj
 
     def to_arrow(self):
+        """
+        Convert a ``StructDtype`` to a ``pyarrow.StructType``.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> struct_type = cudf.StructDtype({"x": "int32", "y": "string"})
+        >>> struct_type
+        StructDtype({'x': dtype('int32'), 'y': dtype('O')})
+        >>> struct_type.to_arrow()
+        StructType(struct<x: int32, y: string>)
+        """
         return self._typ
 
     def __eq__(self, other):
@@ -433,30 +628,42 @@ def deserialize(cls, header: dict, frames: list):
         return cls(fields)
 
 
-class DecimalDtype(_BaseDtype):
-    """
-    Parameters
-    ----------
-    precision : int
-        The total number of digits in each value of this dtype
-    scale : int, optional
-        The scale of the dtype. See Notes below.
-
-    Notes
-    -----
-        When the scale is positive:
-            - numbers with fractional parts (e.g., 0.0042) can be represented
-            - the scale is the total number of digits to the right of the
-            decimal point
-        When the scale is negative:
-            - only multiples of powers of 10 (including 10**0) can be
-            represented (e.g., 1729, 4200, 1000000)
-            - the scale represents the number of trailing zeros in the value.
-        For example, 42 is representable with precision=2 and scale=0.
-        13.0051 is representable with precision=6 and scale=4,
-        and *not* representable with precision<6 or scale<4.
+decimal_dtype_template = textwrap.dedent(
     """
+        Type to represent a ``decimal{size}`` data.
+
+        Parameters
+        ----------
+        precision : int
+            The total number of digits in each value of this dtype
+        scale : int, optional
+            The scale of the dtype. See Notes below.
+
+        Notes
+        -----
+            When the scale is positive:
+                - numbers with fractional parts (e.g., 0.0042) can be represented
+                - the scale is the total number of digits to the right of the
+                decimal point
+            When the scale is negative:
+                - only multiples of powers of 10 (including 10**0) can be
+                represented (e.g., 1729, 4200, 1000000)
+                - the scale represents the number of trailing zeros in the value.
+            For example, 42 is representable with precision=2 and scale=0.
+            13.0051 is representable with precision=6 and scale=4,
+            and *not* representable with precision<6 or scale<4.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> decimal{size}_dtype = cudf.Decimal{size}Dtype(precision=9, scale=2)
+        >>> decimal{size}_dtype
+        Decimal{size}Dtype(precision=9, scale=2)
+    """  # noqa: E501
+)
+
 
+class DecimalDtype(_BaseDtype):
     _metadata = ("precision", "scale")
 
     def __init__(self, precision, scale=0):
@@ -469,6 +676,9 @@ def str(self):
 
     @property
     def precision(self):
+        """
+        The decimal precision, in number of decimal digits (an integer).
+        """
         return self._typ.precision
 
     @precision.setter
@@ -478,10 +688,16 @@ def precision(self, value):
 
     @property
     def scale(self):
+        """
+        The decimal scale (an integer).
+        """
         return self._typ.scale
 
     @property
     def itemsize(self):
+        """
+        Length of one column element in bytes.
+        """
         return self.ITEMSIZE
 
     @property
@@ -490,10 +706,37 @@ def type(self):
         return decimal.Decimal
 
     def to_arrow(self):
+        """
+        Return the equivalent ``pyarrow`` dtype.
+        """
         return self._typ
 
     @classmethod
     def from_arrow(cls, typ):
+        """
+        Construct a cudf decimal dtype from a ``pyarrow`` dtype
+
+        Examples
+        --------
+        >>> import cudf
+        >>> import pyarrow as pa
+        >>> pa_type = pa.decimal128(precision=9, scale=2)
+
+        Constructing a ``Decimal32Dtype``:
+
+        >>> cudf.Decimal32Dtype.from_arrow(pa_type)
+        Decimal64Dtype(precision=9, scale=2)
+
+        Constructing a ``Decimal64Dtype``:
+
+        >>> cudf.Decimal64Dtype.from_arrow(pa_type)
+        Decimal64Dtype(precision=9, scale=2)
+
+        Constructing a ``Decimal128Dtype``:
+
+        >>> cudf.Decimal128Dtype.from_arrow(pa_type)
+        Decimal128Dtype(precision=9, scale=2)
+        """
         return cls(typ.precision, typ.scale)
 
     def __repr__(self):
@@ -551,18 +794,33 @@ def __hash__(self):
         return hash(self._typ)
 
 
+@doc_apply(
+    decimal_dtype_template.format(
+        size="32",
+    )
+)
 class Decimal32Dtype(DecimalDtype):
     name = "decimal32"
     MAX_PRECISION = np.floor(np.log10(np.iinfo("int32").max))
     ITEMSIZE = 4
 
 
+@doc_apply(
+    decimal_dtype_template.format(
+        size="64",
+    )
+)
 class Decimal64Dtype(DecimalDtype):
     name = "decimal64"
     MAX_PRECISION = np.floor(np.log10(np.iinfo("int64").max))
     ITEMSIZE = 8
 
 
+@doc_apply(
+    decimal_dtype_template.format(
+        size="128",
+    )
+)
 class Decimal128Dtype(DecimalDtype):
     name = "decimal128"
     MAX_PRECISION = 38

From 2ee41d0b35ffda2dd6f0a698499a94afea176865 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 25 Oct 2022 01:34:31 -0700
Subject: [PATCH 067/202] Replace most of preprocessor usage in nvcomp adapter
 with `constexpr` (#11980)

C++17's "constexpr if" provides the same functionality as `#if` directive, as used in the nvcomp adapter.
This PR replaces macros with `constexpr` variables and uses them as conditions in "constexpr if" statements.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Tobias Ribizel (https://github.com/upsj)

URL: https://github.com/rapidsai/cudf/pull/11980
---
 cpp/src/io/comp/nvcomp_adapter.cpp | 258 ++++++++++++++---------------
 1 file changed, 123 insertions(+), 135 deletions(-)

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index fd794b2e66c..f66f2ec0c0c 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -31,46 +31,30 @@
 #include NVCOMP_ZSTD_HEADER
 #endif
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3)
-#define NVCOMP_HAS_ZSTD_DECOMP 1
-#else
-#define NVCOMP_HAS_ZSTD_DECOMP 0
-#endif
+constexpr bool NVCOMP_HAS_ZSTD_DECOMP = NVCOMP_MAJOR_VERSION > 2 or
+                                        (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3);
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 4)
-#define NVCOMP_HAS_ZSTD_COMP 1
-#else
-#define NVCOMP_HAS_ZSTD_COMP 0
-#endif
+constexpr bool NVCOMP_HAS_ZSTD_COMP = NVCOMP_MAJOR_VERSION > 2 or
+                                      (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 4);
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3)
-#define NVCOMP_HAS_DEFLATE 1
-#else
-#define NVCOMP_HAS_DEFLATE 0
-#endif
+constexpr bool NVCOMP_HAS_DEFLATE = NVCOMP_MAJOR_VERSION > 2 or
+                                    (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3);
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or \
-  (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and NVCOMP_PATCH_VERSION >= 1)
-#define NVCOMP_HAS_TEMPSIZE_EX 1
-#else
-#define NVCOMP_HAS_TEMPSIZE_EX 0
-#endif
+constexpr bool NVCOMP_HAS_TEMPSIZE_EX = NVCOMP_MAJOR_VERSION > 2 or
+                                        (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or
+                                        (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and
+                                         NVCOMP_PATCH_VERSION >= 1);
 
 // ZSTD is stable for nvcomp 2.3.2 or newer
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or \
-  (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and NVCOMP_PATCH_VERSION >= 2)
-#define NVCOMP_ZSTD_IS_STABLE 1
-#else
-#define NVCOMP_ZSTD_IS_STABLE 0
-#endif
+constexpr bool NVCOMP_ZSTD_IS_STABLE = NVCOMP_MAJOR_VERSION > 2 or
+                                       (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or
+                                       (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and
+                                        NVCOMP_PATCH_VERSION >= 2);
 
 // Issue https://github.com/NVIDIA/spark-rapids/issues/6614 impacts nvCOMP 2.4.0 ZSTD decompression
 // on compute 6.x
-#if NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 4 and NVCOMP_PATCH_VERSION == 0
-#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL 1
-#else
-#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL 0
-#endif
+constexpr bool NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL =
+  NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 4 and NVCOMP_PATCH_VERSION == 0;
 
 namespace cudf::io::nvcomp {
 
@@ -79,20 +63,20 @@ template <typename... Args>
 std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
                                                                   Args&&... args)
 {
-#if NVCOMP_HAS_TEMPSIZE_EX
-  switch (compression) {
-    case compression_type::SNAPPY:
-      return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward<Args>(args)...);
-    case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_DECOMP
-      return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
-#else
-      return std::nullopt;
-#endif
-    case compression_type::DEFLATE: [[fallthrough]];
-    default: return std::nullopt;
+  if constexpr (NVCOMP_HAS_TEMPSIZE_EX) {
+    switch (compression) {
+      case compression_type::SNAPPY:
+        return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward<Args>(args)...);
+      case compression_type::ZSTD:
+        if constexpr (NVCOMP_HAS_ZSTD_DECOMP) {
+          return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
+        } else {
+          return std::nullopt;
+        }
+      case compression_type::DEFLATE: [[fallthrough]];
+      default: return std::nullopt;
+    }
   }
-#endif
   return std::nullopt;
 }
 
@@ -104,17 +88,17 @@ auto batched_decompress_get_temp_size(compression_type compression, Args&&... ar
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressGetTempSize(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_DECOMP
-      return nvcompBatchedZstdDecompressGetTempSize(std::forward<Args>(args)...);
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_ZSTD_DECOMP) {
+        return nvcompBatchedZstdDecompressGetTempSize(std::forward<Args>(args)...);
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
-      return nvcompBatchedDeflateDecompressGetTempSize(std::forward<Args>(args)...);
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_DEFLATE) {
+        return nvcompBatchedDeflateDecompressGetTempSize(std::forward<Args>(args)...);
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     default: CUDF_FAIL("Unsupported compression type");
   }
 }
@@ -127,17 +111,18 @@ auto batched_decompress_async(compression_type compression, Args&&... args)
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressAsync(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_DECOMP
-      return nvcompBatchedZstdDecompressAsync(std::forward<Args>(args)...);
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_ZSTD_DECOMP) {
+        return nvcompBatchedZstdDecompressAsync(std::forward<Args>(args)...);
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
+
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
-      return nvcompBatchedDeflateDecompressAsync(std::forward<Args>(args)...);
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_DEFLATE) {
+        return nvcompBatchedDeflateDecompressAsync(std::forward<Args>(args)...);
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     default: CUDF_FAIL("Unsupported compression type");
   }
 }
@@ -170,13 +155,13 @@ void check_is_zstd_enabled()
                "Zstandard compression is experimental, you can enable it through "
                "`LIBCUDF_NVCOMP_POLICY` environment variable.");
 
-#if NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL
-  int device;
-  int cc_major;
-  CUDF_CUDA_TRY(cudaGetDevice(&device));
-  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device));
-  CUDF_EXPECTS(cc_major != 6, "Zstandard decompression is disabled on Pascal GPUs");
-#endif
+  if constexpr (NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL) {
+    int device;
+    int cc_major;
+    CUDF_CUDA_TRY(cudaGetDevice(&device));
+    CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device));
+    CUDF_EXPECTS(cc_major != 6, "Zstandard decompression is disabled on Pascal GPUs");
+  }
 }
 
 void batched_decompress(compression_type compression,
@@ -228,21 +213,22 @@ auto batched_compress_temp_size(compression_type compression,
         batch_size, max_uncompressed_chunk_bytes, nvcompBatchedSnappyDefaultOpts, &temp_size);
       break;
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
-      nvcomp_status = nvcompBatchedDeflateCompressGetTempSize(
-        batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size);
-      break;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_DEFLATE) {
+        nvcomp_status = nvcompBatchedDeflateCompressGetTempSize(
+          batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size);
+        break;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
-      nvcomp_status = nvcompBatchedZstdCompressGetTempSize(
-        batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size);
-      break;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
+        nvcomp_status = nvcompBatchedZstdCompressGetTempSize(
+          batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size);
+        break;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
+
     default: CUDF_FAIL("Unsupported compression type");
   }
 
@@ -266,21 +252,21 @@ size_t compress_max_output_chunk_size(compression_type compression,
         capped_uncomp_bytes, nvcompBatchedSnappyDefaultOpts, &max_comp_chunk_size);
       break;
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
-      status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize(
-        capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size);
-      break;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_DEFLATE) {
+        status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize(
+          capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size);
+        break;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
-      status = nvcompBatchedZstdCompressGetMaxOutputChunkSize(
-        capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size);
-      break;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
+        status = nvcompBatchedZstdCompressGetMaxOutputChunkSize(
+          capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size);
+        break;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     default: CUDF_FAIL("Unsupported compression type");
   }
 
@@ -316,37 +302,39 @@ static void batched_compress_async(compression_type compression,
                                                        stream.value());
       break;
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
-      nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs,
-                                                        device_uncompressed_bytes,
-                                                        max_uncompressed_chunk_bytes,
-                                                        batch_size,
-                                                        device_temp_ptr,
-                                                        temp_bytes,
-                                                        device_compressed_ptrs,
-                                                        device_compressed_bytes,
-                                                        nvcompBatchedDeflateDefaultOpts,
-                                                        stream.value());
-      break;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_DEFLATE) {
+        nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs,
+                                                          device_uncompressed_bytes,
+                                                          max_uncompressed_chunk_bytes,
+                                                          batch_size,
+                                                          device_temp_ptr,
+                                                          temp_bytes,
+                                                          device_compressed_ptrs,
+                                                          device_compressed_bytes,
+                                                          nvcompBatchedDeflateDefaultOpts,
+                                                          stream.value());
+        break;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
+
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
-      nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs,
-                                                     device_uncompressed_bytes,
-                                                     max_uncompressed_chunk_bytes,
-                                                     batch_size,
-                                                     device_temp_ptr,
-                                                     temp_bytes,
-                                                     device_compressed_ptrs,
-                                                     device_compressed_bytes,
-                                                     nvcompBatchedZstdDefaultOpts,
-                                                     stream.value());
-      break;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
+        nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs,
+                                                       device_uncompressed_bytes,
+                                                       max_uncompressed_chunk_bytes,
+                                                       batch_size,
+                                                       device_temp_ptr,
+                                                       temp_bytes,
+                                                       device_compressed_ptrs,
+                                                       device_compressed_bytes,
+                                                       nvcompBatchedZstdDefaultOpts,
+                                                       stream.value());
+        break;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
+
     default: CUDF_FAIL("Unsupported compression type");
   }
   CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, "Error in compression");
@@ -430,11 +418,11 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
     case compression_type::DEFLATE: return 64 * 1024;
     case compression_type::SNAPPY: return std::nullopt;
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
-      return nvcompZstdCompressionMaxAllowedChunkSize;
-#else
-      CUDF_FAIL("Unsupported compression type");
-#endif
+      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
+        return nvcompZstdCompressionMaxAllowedChunkSize;
+      } else {
+        CUDF_FAIL("Unsupported compression type");
+      }
     default: return std::nullopt;
   }
 }

From dc5924c009b685248dca8491d96189d0ad374ca0 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 25 Oct 2022 10:31:13 -0400
Subject: [PATCH 068/202] Add pool memory resource to libcudf basic example
 (#11966)

Adds the pool memory resource to the libcudf basic example.
Also adds README.md to the strings example and makes some minor fixes to the documentation.

Closes #11870

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Jake Hemstad (https://github.com/jrhemstad)
  - Elias Stehle (https://github.com/elstehle)

URL: https://github.com/rapidsai/cudf/pull/11966
---
 cpp/examples/README.md                 |  3 ++-
 cpp/examples/basic/README.md           |  2 +-
 cpp/examples/basic/src/process_csv.cpp | 19 +++++++++++++
 cpp/examples/strings/README.md         | 37 ++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 2 deletions(-)
 create mode 100644 cpp/examples/strings/README.md

diff --git a/cpp/examples/README.md b/cpp/examples/README.md
index 30b291d38f4..b2e8dd399d0 100644
--- a/cpp/examples/README.md
+++ b/cpp/examples/README.md
@@ -5,4 +5,5 @@ libcudf examples.
 
 Current examples:
 
-- Basic: example that demonstrates basic use case with libcudf and building a custom application with libcudf.
+- Basic: demonstrates a basic use case with libcudf and building a custom application with libcudf
+- Strings: demonstrates using libcudf for accessing and creating strings columns and for building custom kernels for strings
diff --git a/cpp/examples/basic/README.md b/cpp/examples/basic/README.md
index 75f16e54033..471dcf6694f 100644
--- a/cpp/examples/basic/README.md
+++ b/cpp/examples/basic/README.md
@@ -15,7 +15,7 @@ cmake -S . -B build/
 # Build
 cmake --build build/ --parallel $PARALLEL_LEVEL
 # Execute
-build/libcudf_example
+build/basic_example
 ```
 
 If your machine does not come with a pre-built libcudf binary, expect the
diff --git a/cpp/examples/basic/src/process_csv.cpp b/cpp/examples/basic/src/process_csv.cpp
index 5a3914da453..edd14d9ee5f 100644
--- a/cpp/examples/basic/src/process_csv.cpp
+++ b/cpp/examples/basic/src/process_csv.cpp
@@ -19,6 +19,10 @@
 #include <cudf/io/csv.hpp>
 #include <cudf/table/table.hpp>
 
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+#include <rmm/mr/device/pool_memory_resource.hpp>
+
 #include <memory>
 #include <string>
 #include <utility>
@@ -72,6 +76,21 @@ std::unique_ptr<cudf::table> average_closing_price(cudf::table_view stock_info_t
 
 int main(int argc, char** argv)
 {
+  // Construct a CUDA memory resource using RAPIDS Memory Manager (RMM)
+  // This is the default memory resource for libcudf for allocating device memory.
+  rmm::mr::cuda_memory_resource cuda_mr{};
+  // Construct a memory pool using the CUDA memory resource
+  // Using a memory pool for device memory allocations is important for good performance in libcudf.
+  // The pool defaults to allocating half of the available GPU memory.
+  rmm::mr::pool_memory_resource mr{&cuda_mr};
+
+  // Set the pool resource to be used by default for all device memory allocations
+  // Note: It is the user's responsibility to ensure the `mr` object stays alive for the duration of
+  // it being set as the default
+  // Also, call this before the first libcudf API call to ensure all data is allocated by the same
+  // memory resource.
+  rmm::mr::set_current_device_resource(&mr);
+
   // Read data
   auto stock_table_with_metadata = read_csv("4stock_5day.csv");
 
diff --git a/cpp/examples/strings/README.md b/cpp/examples/strings/README.md
new file mode 100644
index 00000000000..241aa064bcc
--- /dev/null
+++ b/cpp/examples/strings/README.md
@@ -0,0 +1,37 @@
+# libcudf C++ examples using strings columns
+
+This C++ example demonstrates using libcudf APIs to access and create
+strings columns.
+
+The example source code loads a csv file and produces a redacted strings
+column from the names column using the values from the visibilities column.
+
+Four examples are included:
+1. Using libcudf APIs to build the output
+2. Using a simple custom kernel with dynamic memory
+3. Using a custom kernel with pre-allocated device memory
+4. Using a two-pass approach to improve performance
+
+These examples are described in more detail in
+https://developer.nvidia.com/blog/mastering-string-transformations-in-rapids-libcudf/
+
+## Compile and execute
+
+```bash
+# Configure project
+cmake -S . -B build/
+# Build
+cmake --build build/ --parallel $PARALLEL_LEVEL
+# Execute
+build/libcudf_apis names.csv
+--OR--
+build/custom_with_malloc names.csv
+--OR--
+build/custom_prealloc names.csv
+--OR--
+build/custom_optimized names.csv
+```
+
+If your machine does not come with a pre-built libcudf binary, expect the
+first build to take some time, as it would build libcudf on the host machine.
+It may be sped up by configuring the proper `PARALLEL_LEVEL` number.

From 2d89f43f6a14d3f818d62d1a3a817ed86365a403 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 25 Oct 2022 08:54:19 -0700
Subject: [PATCH 069/202] Add missing noexcepts to column_in_metadata methods
 (#11973)

These functions cannot throw exceptions.

Resolved #11399

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11973
---
 cpp/include/cudf/io/types.hpp | 40 ++++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index 838151fbaf9..9c47ed9ea69 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -419,7 +419,7 @@ class column_in_metadata {
    * @param name Name of the column
    * @return this for chaining
    */
-  column_in_metadata& set_name(std::string const& name)
+  column_in_metadata& set_name(std::string const& name) noexcept
   {
     _name = name;
     return *this;
@@ -433,7 +433,7 @@ class column_in_metadata {
    * @param nullable Whether this column is nullable
    * @return this for chaining
    */
-  column_in_metadata& set_nullability(bool nullable)
+  column_in_metadata& set_nullability(bool nullable) noexcept
   {
     _nullable = nullable;
     return *this;
@@ -446,7 +446,7 @@ class column_in_metadata {
    *
    * @return this for chaining
    */
-  column_in_metadata& set_list_column_as_map()
+  column_in_metadata& set_list_column_as_map() noexcept
   {
     _list_column_is_map = true;
     return *this;
@@ -460,7 +460,7 @@ class column_in_metadata {
    * @param req True = use int96 physical type. False = use int64 physical type
    * @return this for chaining
    */
-  column_in_metadata& set_int96_timestamps(bool req)
+  column_in_metadata& set_int96_timestamps(bool req) noexcept
   {
     _use_int96_timestamp = req;
     return *this;
@@ -473,7 +473,7 @@ class column_in_metadata {
    * @param precision The integer precision to set for this decimal column
    * @return this for chaining
    */
-  column_in_metadata& set_decimal_precision(uint8_t precision)
+  column_in_metadata& set_decimal_precision(uint8_t precision) noexcept
   {
     _decimal_precision = precision;
     return *this;
@@ -485,7 +485,7 @@ class column_in_metadata {
    * @param field_id The parquet field id to set
    * @return this for chaining
    */
-  column_in_metadata& set_parquet_field_id(int32_t field_id)
+  column_in_metadata& set_parquet_field_id(int32_t field_id) noexcept
   {
     _parquet_field_id = field_id;
     return *this;
@@ -499,7 +499,7 @@ class column_in_metadata {
    * @param binary True = use binary data type. False = use string data type
    * @return this for chaining
    */
-  column_in_metadata& set_output_as_binary(bool binary)
+  column_in_metadata& set_output_as_binary(bool binary) noexcept
   {
     _output_as_binary = binary;
     return *this;
@@ -511,7 +511,7 @@ class column_in_metadata {
    * @param i Index of the child to get
    * @return this for chaining
    */
-  column_in_metadata& child(size_type i) { return children[i]; }
+  column_in_metadata& child(size_type i) noexcept { return children[i]; }
 
   /**
    * @brief Get const reference to a child of this column
@@ -519,21 +519,21 @@ class column_in_metadata {
    * @param i Index of the child to get
    * @return this for chaining
    */
-  [[nodiscard]] column_in_metadata const& child(size_type i) const { return children[i]; }
+  [[nodiscard]] column_in_metadata const& child(size_type i) const noexcept { return children[i]; }
 
   /**
    * @brief Get the name of this column
    *
    * @return The name of this column
    */
-  [[nodiscard]] std::string get_name() const { return _name; }
+  [[nodiscard]] std::string get_name() const noexcept { return _name; }
 
   /**
    * @brief Get whether nullability has been explicitly set for this column.
    *
    * @return Boolean indicating whether nullability has been explicitly set for this column
    */
-  [[nodiscard]] bool is_nullability_defined() const { return _nullable.has_value(); }
+  [[nodiscard]] bool is_nullability_defined() const noexcept { return _nullable.has_value(); }
 
   /**
    * @brief Gets the explicitly set nullability for this column.
@@ -549,7 +549,7 @@ class column_in_metadata {
    *
    * @return Boolean indicating whether this column is to be encoded as a map
    */
-  [[nodiscard]] bool is_map() const { return _list_column_is_map; }
+  [[nodiscard]] bool is_map() const noexcept { return _list_column_is_map; }
 
   /**
    * @brief Get whether to encode this timestamp column using deprecated int96 physical type
@@ -557,14 +557,17 @@ class column_in_metadata {
    * @return Boolean indicating whether to encode this timestamp column using deprecated int96
    *         physical type
    */
-  [[nodiscard]] bool is_enabled_int96_timestamps() const { return _use_int96_timestamp; }
+  [[nodiscard]] bool is_enabled_int96_timestamps() const noexcept { return _use_int96_timestamp; }
 
   /**
    * @brief Get whether precision has been set for this decimal column
    *
    * @return Boolean indicating whether precision has been set for this decimal column
    */
-  [[nodiscard]] bool is_decimal_precision_set() const { return _decimal_precision.has_value(); }
+  [[nodiscard]] bool is_decimal_precision_set() const noexcept
+  {
+    return _decimal_precision.has_value();
+  }
 
   /**
    * @brief Get the decimal precision that was set for this column.
@@ -580,7 +583,10 @@ class column_in_metadata {
    *
    * @return Boolean indicating whether parquet field id has been set for this column
    */
-  [[nodiscard]] bool is_parquet_field_id_set() const { return _parquet_field_id.has_value(); }
+  [[nodiscard]] bool is_parquet_field_id_set() const noexcept
+  {
+    return _parquet_field_id.has_value();
+  }
 
   /**
    * @brief Get the parquet field id that was set for this column.
@@ -596,14 +602,14 @@ class column_in_metadata {
    *
    * @return The number of children of this column
    */
-  [[nodiscard]] size_type num_children() const { return children.size(); }
+  [[nodiscard]] size_type num_children() const noexcept { return children.size(); }
 
   /**
    * @brief Get whether to encode this column as binary or string data
    *
    * @return Boolean indicating whether to encode this column as binary data
    */
-  [[nodiscard]] bool is_enabled_output_as_binary() const { return _output_as_binary; }
+  [[nodiscard]] bool is_enabled_output_as_binary() const noexcept { return _output_as_binary; }
 };
 
 /**

From 285cb9e43d3d8c49d3a0a6020c16b38a4bf4f381 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 25 Oct 2022 09:11:50 -0700
Subject: [PATCH 070/202] Replace default_stream_value with get_default_stream
 in docs. (#11985)

Brings the docs in line with the new way of getting the default stream in libcudf.

Authors:
   - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
   - Yunsong Wang (https://github.com/PointKernel)
   - Tobias Ribizel (https://github.com/upsj)
   - Nghia Truong (https://github.com/ttnghia)
---
 cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md | 4 ++--
 cpp/doxygen/developer_guide/DOCUMENTATION.md   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index 606dabcb937..e49e270625b 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -416,7 +416,7 @@ internal API in the `detail` namespace. The internal `detail` API has the same p
 public API, plus a `rmm::cuda_stream_view` parameter at the end with no default value. If the
 detail API also accepts a memory resource parameter, the stream parameter should be ideally placed
 just *before* the memory resource. The public API will call the detail API and provide
-`cudf::default_stream_value`. The implementation should be wholly contained in the `detail` API
+`cudf::get_default_stream()`. The implementation should be wholly contained in the `detail` API
 definition and use only asynchronous versions of CUDA APIs with the stream parameter.
 
 In order to make the `detail` API callable from other libcudf functions, it should be exposed in a
@@ -447,7 +447,7 @@ namespace detail{
 
 void external_function(...){
     CUDF_FUNC_RANGE(); // Generates an NVTX range for the lifetime of this function.
-    detail::external_function(..., cudf::default_stream_value);
+    detail::external_function(..., cudf::get_default_stream());
 }
 ```
 
diff --git a/cpp/doxygen/developer_guide/DOCUMENTATION.md b/cpp/doxygen/developer_guide/DOCUMENTATION.md
index 07ef1bdc530..b86f7db82b0 100644
--- a/cpp/doxygen/developer_guide/DOCUMENTATION.md
+++ b/cpp/doxygen/developer_guide/DOCUMENTATION.md
@@ -224,7 +224,7 @@ Also, \@copydoc is useful when documenting a `detail` function that differs only
      */
     std::vector<size_type> segmented_count_set_bits(bitmask_type const* bitmask,
                                                     std::vector<size_type> const& indices,
-                                                    rmm::cuda_stream_view stream = cudf::default_stream_value);
+                                                    rmm::cuda_stream_view stream = cudf::get_default_stream());
 
 Note, you must specify the whole signature of the function, including optional parameters, so that doxygen will be able to locate it.
 

From a37f27b4e4be397d875201ac61bf0c11590b1617 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 25 Oct 2022 14:21:20 -0400
Subject: [PATCH 071/202] Ensure better compiler cache results between cudf
 cal-ver branches (#11835)

By passing the CUDF_VERSION compile definition only to the single source that needs it, we can remove compiler cache misses when switching between branches with different cal-ver values.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11835
---
 cpp/CMakeLists.txt | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index bfabbbc625d..179ee298be3 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -617,9 +617,15 @@ target_compile_definitions(
 # Disable Jitify log printing. See https://github.com/NVIDIA/jitify/issues/79
 target_compile_definitions(cudf PRIVATE "JITIFY_PRINT_LOG=0")
 
-# Instruct jitify to use the kernel JIT cache
 if(JITIFY_USE_CACHE)
-  target_compile_definitions(cudf PUBLIC JITIFY_USE_CACHE "CUDF_VERSION=${PROJECT_VERSION}")
+  # Instruct src/jit/cache what version of cudf we are building so it can compute a cal-ver cache
+  # directory. We isolate this definition to the single source so it doesn't effect compiling
+  # caching for all of libcudf
+  set_property(
+    SOURCE src/jit/cache.cpp
+    APPEND
+    PROPERTY COMPILE_DEFINITIONS "JITIFY_USE_CACHE" "CUDF_VERSION=${PROJECT_VERSION}"
+  )
 endif()
 
 # Per-thread default stream

From ffd130a2fb7f85d4304cb344835694449a6e15ad Mon Sep 17 00:00:00 2001
From: Ray Douglass <3107146+raydouglass@users.noreply.github.com>
Date: Tue, 25 Oct 2022 15:08:05 -0400
Subject: [PATCH 072/202] Remove stale labeler (#11995)

This PR removes the stale issue labeler workflow

Authors:
  - Ray Douglass (https://github.com/raydouglass)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/11995
---
 .github/workflows/stale.yaml | 57 ------------------------------------
 1 file changed, 57 deletions(-)
 delete mode 100644 .github/workflows/stale.yaml

diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
deleted file mode 100644
index 741e159fbd8..00000000000
--- a/.github/workflows/stale.yaml
+++ /dev/null
@@ -1,57 +0,0 @@
-name: Mark inactive issues and pull requests
-
-on:
-  schedule:
-    - cron: "0 * * * *"
-
-jobs:
-  mark-inactive-30d:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Mark 30 day inactive issues
-        uses: actions/stale@v3
-        with:
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
-          stale-issue-message: >
-            This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days.
-            Please close this issue if no further response or action is needed.
-            Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
-            This issue will be labeled `inactive-90d` if there is no activity in the next 60 days.
-          stale-issue-label: "inactive-30d"
-          exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
-          days-before-issue-stale: 30
-          days-before-issue-close: -1
-          stale-pr-message: >
-            This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days.
-            Please close this PR if it is no longer required.
-            Otherwise, please respond with a comment indicating any updates.
-            This PR will be labeled `inactive-90d` if there is no activity in the next 60 days.
-          stale-pr-label: "inactive-30d"
-          exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
-          days-before-pr-stale: 30
-          days-before-pr-close: -1
-          operations-per-run: 50
-  mark-inactive-90d:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Mark 90 day inactive issues
-        uses: actions/stale@v3
-        with:
-          repo-token: ${{ secrets.GITHUB_TOKEN }}
-          stale-issue-message: >
-            This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days.
-            Please close this issue if no further response or action is needed.
-            Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
-          stale-issue-label: "inactive-90d"
-          exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
-          days-before-issue-stale: 90
-          days-before-issue-close: -1
-          stale-pr-message: >
-            This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days.
-            Please close this PR if it is no longer required.
-            Otherwise, please respond with a comment indicating any updates.
-          stale-pr-label: "inactive-90d"
-          exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
-          days-before-pr-stale: 90
-          days-before-pr-close: -1
-          operations-per-run: 50

From 6a5c77b27120f8b2e390be9adbbc8b600e7e36a8 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 25 Oct 2022 15:35:34 -0400
Subject: [PATCH 073/202] Minor cleanup of root CMakeLists.txt for better
 organization (#11988)

Cleanup  some minor issues in the root cudf CMakeLists.txt. Make the seaching for `CUDA_SANITIZER` only occur when we are building tests as that doesn't need to be done for production builds.

Move the gdb pretty print script logic to a separate region to better document what it is for.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11988
---
 cpp/CMakeLists.txt | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 179ee298be3..0ffcc2e3e0b 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -124,12 +124,6 @@ rapids_find_package(
 )
 include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
 
-# ctest cuda memcheck
-find_program(CUDA_SANITIZER compute-sanitizer)
-set(MEMORYCHECK_COMMAND ${CUDA_SANITIZER})
-set(MEMORYCHECK_TYPE CudaSanitizer)
-set(CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck")
-
 # ##################################################################################################
 # * dependencies ----------------------------------------------------------------------------------
 
@@ -738,6 +732,13 @@ add_library(cudf::cudftestutil ALIAS cudftestutil)
 if(CUDF_BUILD_TESTS)
   # include CTest module -- automatically calls enable_testing()
   include(CTest)
+
+  # ctest cuda memcheck
+  find_program(CUDA_SANITIZER compute-sanitizer)
+  set(MEMORYCHECK_COMMAND ${CUDA_SANITIZER})
+  set(MEMORYCHECK_TYPE CudaSanitizer)
+  set(CUDA_SANITIZER_COMMAND_OPTIONS "--tool memcheck")
+
   # Always print verbose output when tests fail if run using `make test`.
   list(APPEND CMAKE_CTEST_ARGUMENTS "--output-on-failure")
   add_subdirectory(tests)
@@ -762,11 +763,6 @@ if(CUDF_BUILD_BENCHMARKS)
   add_subdirectory(benchmarks)
 endif()
 
-# build pretty-printer load script
-if(Thrust_SOURCE_DIR AND rmm_SOURCE_DIR)
-  configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY)
-endif()
-
 # ##################################################################################################
 # * install targets -------------------------------------------------------------------------------
 rapids_cmake_install_lib_dir(lib_dir)
@@ -930,3 +926,11 @@ add_custom_target(
   DEPENDS CUDF_DOXYGEN
   COMMENT "Custom command for building cudf doxygen docs."
 )
+
+# ##################################################################################################
+# * make gdb helper scripts ------------------------------------------------------------------------
+
+# build pretty-printer load script
+if(Thrust_SOURCE_DIR AND rmm_SOURCE_DIR)
+  configure_file(scripts/load-pretty-printers.in load-pretty-printers @ONLY)
+endif()

From 5bfc9a46528e23b5461cb41f4bd160e08f61de70 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 25 Oct 2022 16:25:11 -0700
Subject: [PATCH 074/202] Move protobuf compilation to CMake (#11986)

We currently compile a proto file into a Python file inside setup.py by overriding a certain setuptool (scikit-build) stage (`build_ext`). However, depending on the exact means by which we are building the package (specifically, in the case of building wheels) we may occasionally bypass that stage. Putting this logic into the CMake guarantees that it is always run.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/11986
---
 python/cudf/CMakeLists.txt                    |  4 ++
 .../cudf/cmake/Modules/ProtobufHelpers.cmake  | 51 +++++++++++++++++++
 .../metadata/orc_column_statistics.proto      |  2 +
 python/cudf/setup.py                          | 49 +-----------------
 4 files changed, 58 insertions(+), 48 deletions(-)
 create mode 100644 python/cudf/cmake/Modules/ProtobufHelpers.cmake

diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 1cea23669e9..0781a38e6ad 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -63,3 +63,7 @@ include(rapids-cython)
 rapids_cython_init()
 
 add_subdirectory(cudf/_lib)
+
+include(cmake/Modules/ProtobufHelpers.cmake)
+
+codegen_protoc(cudf/utils/metadata/orc_column_statistics.proto)
diff --git a/python/cudf/cmake/Modules/ProtobufHelpers.cmake b/python/cudf/cmake/Modules/ProtobufHelpers.cmake
new file mode 100644
index 00000000000..e3a0edf978e
--- /dev/null
+++ b/python/cudf/cmake/Modules/ProtobufHelpers.cmake
@@ -0,0 +1,51 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+include_guard(GLOBAL)
+
+# Compile protobuf files to Python. All arguments are assumed to be .proto files.
+function(codegen_protoc)
+  # Allow user to provide path to protoc executable as an environment variable.
+  if(DEFINED ENV{PROTOC})
+    set(protoc_COMMAND $ENV{PROTOC})
+  else()
+    find_program(protoc_COMMAND protoc REQUIRED)
+  endif()
+
+  foreach(_proto_path IN LISTS ARGV)
+    string(REPLACE "\.proto" "_pb2\.py" pb2_py_path "${_proto_path}")
+    set(pb2_py_path "${CMAKE_CURRENT_SOURCE_DIR}/${pb2_py_path}")
+    # Note: If we ever need to process larger numbers of protobuf files we should consider switching
+    # to protobuf_generate_python from the FindProtobuf module.
+    execute_process(
+      COMMAND ${protoc_COMMAND} --python_out=. "${_proto_path}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND_ERROR_IS_FATAL ANY
+    )
+    # Mark entire file to skip formatting.
+    file(READ "${pb2_py_path}" pb2_py)
+    file(
+      WRITE "${pb2_py_path}"
+      [=[
+# flake8: noqa
+# fmt: off
+]=]
+    )
+    file(APPEND "${pb2_py_path}" "${pb2_py}")
+    file(
+      APPEND "${pb2_py_path}"
+      [=[
+# fmt: on
+]=]
+    )
+  endforeach()
+endfunction()
diff --git a/python/cudf/cudf/utils/metadata/orc_column_statistics.proto b/python/cudf/cudf/utils/metadata/orc_column_statistics.proto
index 9dbaa713c03..1bc0fa6f6bd 100644
--- a/python/cudf/cudf/utils/metadata/orc_column_statistics.proto
+++ b/python/cudf/cudf/utils/metadata/orc_column_statistics.proto
@@ -1,3 +1,5 @@
+syntax = "proto2";
+
 message IntegerStatistics  {
   optional sint64 minimum = 1;
   optional sint64 maximum = 2;
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 3ebb66cb0ad..9f22f87e240 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -3,14 +3,10 @@
 import os
 import re
 import shutil
-import subprocess
-import sys
-from distutils.spawn import find_executable
 
 import versioneer
 from setuptools import find_packages
 from skbuild import setup
-from skbuild.command.build_ext import build_ext
 
 install_requires = [
     "cachetools",
@@ -84,49 +80,6 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
 )
 
 
-class build_ext_and_proto(build_ext):
-    def run(self):
-        # Get protoc
-        protoc = None
-        if "PROTOC" in os.environ and os.path.exists(os.environ["PROTOC"]):
-            protoc = os.environ["PROTOC"]
-        else:
-            protoc = find_executable("protoc")
-        if protoc is None:
-            sys.stderr.write("protoc not found")
-            sys.exit(1)
-
-        # Build .proto file
-        for source in ["cudf/utils/metadata/orc_column_statistics.proto"]:
-            output = source.replace(".proto", "_pb2.py")
-
-            if not os.path.exists(output) or (
-                os.path.getmtime(source) > os.path.getmtime(output)
-            ):
-                with open(output, "a") as src:
-                    src.write("# flake8: noqa" + os.linesep)
-                    src.write("# fmt: off" + os.linesep)
-                subprocess.check_call([protoc, "--python_out=.", source])
-                with open(output, "r+") as src:
-                    new_src_content = (
-                        "# flake8: noqa"
-                        + os.linesep
-                        + "# fmt: off"
-                        + os.linesep
-                        + src.read()
-                        + "# fmt: on"
-                        + os.linesep
-                    )
-                    src.seek(0)
-                    src.write(new_src_content)
-
-        # Run original Cython build_ext command
-        super().run()
-
-
-cmdclass = versioneer.get_cmdclass()
-cmdclass["build_ext"] = build_ext_and_proto
-
 setup(
     name="cudf",
     version=versioneer.get_version(),
@@ -147,7 +100,7 @@ def run(self):
     package_data={
         key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"])
     },
-    cmdclass=cmdclass,
+    cmdclass=versioneer.get_cmdclass(),
     install_requires=install_requires,
     extras_require=extras_require,
     zip_safe=False,

From 6b9c0268f589c9af07796039aea29d9a0cc3361a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 25 Oct 2022 16:45:50 -0700
Subject: [PATCH 075/202] Use rapids-cmake for google benchmark. (#11997)

This PR centralizes handling of google benchmark during the build process by requesting it from rapids-cmake.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11997
---
 cpp/CMakeLists.txt | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0ffcc2e3e0b..ea60f467ce9 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -749,13 +749,8 @@ endif()
 
 if(CUDF_BUILD_BENCHMARKS)
   # Find or install GoogleBench
-  rapids_cpm_find(
-    benchmark 1.5.2
-    GIT_REPOSITORY https://github.com/google/benchmark.git
-    GIT_TAG v1.5.2
-    GIT_SHALLOW TRUE
-    OPTIONS "BENCHMARK_ENABLE_TESTING OFF" "BENCHMARK_ENABLE_INSTALL OFF"
-  )
+  include(${rapids-cmake-dir}/cpm/gbench.cmake)
+  rapids_cpm_gbench()
 
   # Find or install NVBench
   include(${rapids-cmake-dir}/cpm/nvbench.cmake)

From b7d011528af766294e84e4dafd483c650fb5901b Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 25 Oct 2022 19:56:35 -0400
Subject: [PATCH 076/202] Switch to DISABLE_DEPRECATION_WARNINGS to match other
 RAPIDS projects (#11989)

Use the term `DISABLE_DEPRECATION_WARNINGS` so that we match other RAPIDS projects https://github.com/rapidsai/cuml/pull/4946 plus the plural tense in general makes more sense.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11989
---
 build.sh                              | 6 +++---
 cpp/CMakeLists.txt                    | 4 ++--
 cpp/cmake/Modules/ConfigureCUDA.cmake | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/build.sh b/build.sh
index bda3d83798a..e62da9791da 100755
--- a/build.sh
+++ b/build.sh
@@ -64,7 +64,7 @@ BUILD_BENCHMARKS=OFF
 BUILD_ALL_GPU_ARCH=0
 BUILD_NVTX=ON
 BUILD_TESTS=OFF
-BUILD_DISABLE_DEPRECATION_WARNING=ON
+BUILD_DISABLE_DEPRECATION_WARNINGS=ON
 BUILD_PER_THREAD_DEFAULT_STREAM=OFF
 BUILD_REPORT_METRICS=OFF
 BUILD_REPORT_INCL_CACHE_STATS=OFF
@@ -216,7 +216,7 @@ if hasArg --opensource_nvcomp; then
     USE_PROPRIETARY_NVCOMP="OFF"
 fi
 if hasArg --show_depr_warn; then
-    BUILD_DISABLE_DEPRECATION_WARNING=OFF
+    BUILD_DISABLE_DEPRECATION_WARNINGS=OFF
 fi
 if hasArg --ptds; then
     BUILD_PER_THREAD_DEFAULT_STREAM=ON
@@ -285,7 +285,7 @@ if buildAll || hasArg libcudf; then
           -DCUDF_USE_PROPRIETARY_NVCOMP=${USE_PROPRIETARY_NVCOMP} \
           -DBUILD_TESTS=${BUILD_TESTS} \
           -DBUILD_BENCHMARKS=${BUILD_BENCHMARKS} \
-          -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
+          -DDISABLE_DEPRECATION_WARNINGS=${BUILD_DISABLE_DEPRECATION_WARNINGS} \
           -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=${BUILD_PER_THREAD_DEFAULT_STREAM} \
           -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
           ${EXTRA_CMAKE_ARGS}
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index ea60f467ce9..289c432dea5 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -60,7 +60,7 @@ option(
          stream to external libraries."
   OFF
 )
-option(DISABLE_DEPRECATION_WARNING "Disable warnings generated from deprecated declarations." OFF)
+option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated declarations." OFF)
 # Option to enable line info in CUDA device compilation to allow introspection when profiling /
 # memchecking
 option(CUDA_ENABLE_LINEINFO
@@ -79,7 +79,7 @@ message(VERBOSE "CUDF: Build and enable S3 filesystem support for Arrow: ${CUDF_
 message(VERBOSE "CUDF: Build with per-thread default stream: ${CUDF_USE_PER_THREAD_DEFAULT_STREAM}")
 message(
   VERBOSE
-  "CUDF: Disable warnings generated from deprecated declarations: ${DISABLE_DEPRECATION_WARNING}"
+  "CUDF: Disable warnings generated from deprecated declarations: ${DISABLE_DEPRECATION_WARNINGS}"
 )
 message(
   VERBOSE
diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
index 198435e739d..9ee4d3e411c 100644
--- a/cpp/cmake/Modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -22,7 +22,7 @@ list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 list(APPEND CUDF_CUDA_FLAGS -Werror=cross-execution-space-call)
 list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
 
-if(DISABLE_DEPRECATION_WARNING)
+if(DISABLE_DEPRECATION_WARNINGS)
   list(APPEND CUDF_CXX_FLAGS -Wno-deprecated-declarations)
   list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wno-deprecated-declarations)
 endif()

From b89c0e244094cc7bc99a761c28288ba50a96d004 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 26 Oct 2022 08:57:57 -0500
Subject: [PATCH 077/202] Add inplace arithmetic operators to `MaskedType`
 (#11987)

Closes https://github.com/rapidsai/cudf/issues/11887

After merging, we will support syntax like `a += b` inside UDFs used through `DataFrame.apply` and `Series.apply`.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11987
---
 python/cudf/cudf/core/udf/_ops.py             |  7 +++++++
 python/cudf/cudf/tests/test_udf_masked_ops.py | 16 ++++++++++++----
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/udf/_ops.py b/python/cudf/cudf/core/udf/_ops.py
index 559a5bfad4f..6b0640b09ed 100644
--- a/python/cudf/cudf/core/udf/_ops.py
+++ b/python/cudf/cudf/core/udf/_ops.py
@@ -11,6 +11,13 @@
     operator.floordiv,
     operator.mod,
     operator.pow,
+    operator.iadd,
+    operator.isub,
+    operator.imul,
+    operator.itruediv,
+    operator.floordiv,
+    operator.ipow,
+    operator.imod,
 ]
 
 bitwise_ops = [operator.and_, operator.or_, operator.xor]
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 20245bd2a20..f1d110ba168 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -197,6 +197,10 @@ def func(row):
         operator.pow,
         operator.truediv,
         operator.floordiv,
+        operator.imod,
+        operator.ipow,
+        operator.itruediv,
+        operator.ifloordiv,
     }:
         # The following tests cases yield undefined behavior:
         # - truediv(x, False) because its dividing by zero
@@ -219,7 +223,7 @@ def func(row):
     # Just a single column -> result will be all NA
     gdf = cudf.DataFrame({"data": data})
 
-    if constant == 1 and op is operator.pow:
+    if constant == 1 and op in {operator.pow, operator.ipow}:
         # The following tests cases yield differing results from pandas:
         # - 1**NA
         # - True**NA
@@ -237,7 +241,7 @@ def func(row):
 
     gdf = cudf.DataFrame({"data": data})
 
-    if 1 in gdf["data"] and op is operator.pow:
+    if 1 in gdf["data"] and op in {operator.pow, operator.ipow}:
         # In pandas, 1**NA == 1.
         pytest.skip()
     run_masked_udf_test(func, gdf, check_dtype=False)
@@ -483,7 +487,7 @@ def func(x):
 
     # Just a single column -> result will be all NA
     data = cudf.Series([1, 2, cudf.NA])
-    if constant is cudf.NA and op is operator.pow:
+    if constant is cudf.NA and op in {operator.pow, operator.ipow}:
         # in pandas, 1**NA == 1. In cudf, 1**NA == 1.
         with pytest.xfail():
             run_masked_udf_series(func, data, check_dtype=False)
@@ -499,7 +503,11 @@ def func(x):
 
     # Just a single column -> result will be all NA
     data = cudf.Series([1, 2, cudf.NA])
-    if constant is not cudf.NA and constant == 1 and op is operator.pow:
+    if (
+        constant is not cudf.NA
+        and constant == 1
+        and op in {operator.pow, operator.ipow}
+    ):
         # in pandas, 1**NA == 1. In cudf, 1**NA == 1.
         with pytest.xfail():
             run_masked_udf_series(func, data, check_dtype=False)

From c146d21f38aa119376ac4837af159bcb2ca62f90 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vukasin.milovanovic.87@gmail.com>
Date: Wed, 26 Oct 2022 09:35:12 -0700
Subject: [PATCH 078/202] Revert "Replace most of preprocessor usage in nvcomp
 adapter with `constexpr`" (#11999)

Reverts rapidsai/cudf#11980

The PR was made under the assumption that `if constexpr` branches can contain invalid code, if the branch is not taken. However, this only holds for templates.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11999
---
 cpp/src/io/comp/nvcomp_adapter.cpp | 258 +++++++++++++++--------------
 1 file changed, 135 insertions(+), 123 deletions(-)

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index f66f2ec0c0c..fd794b2e66c 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -31,30 +31,46 @@
 #include NVCOMP_ZSTD_HEADER
 #endif
 
-constexpr bool NVCOMP_HAS_ZSTD_DECOMP = NVCOMP_MAJOR_VERSION > 2 or
-                                        (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3);
+#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3)
+#define NVCOMP_HAS_ZSTD_DECOMP 1
+#else
+#define NVCOMP_HAS_ZSTD_DECOMP 0
+#endif
 
-constexpr bool NVCOMP_HAS_ZSTD_COMP = NVCOMP_MAJOR_VERSION > 2 or
-                                      (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 4);
+#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 4)
+#define NVCOMP_HAS_ZSTD_COMP 1
+#else
+#define NVCOMP_HAS_ZSTD_COMP 0
+#endif
 
-constexpr bool NVCOMP_HAS_DEFLATE = NVCOMP_MAJOR_VERSION > 2 or
-                                    (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3);
+#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3)
+#define NVCOMP_HAS_DEFLATE 1
+#else
+#define NVCOMP_HAS_DEFLATE 0
+#endif
 
-constexpr bool NVCOMP_HAS_TEMPSIZE_EX = NVCOMP_MAJOR_VERSION > 2 or
-                                        (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or
-                                        (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and
-                                         NVCOMP_PATCH_VERSION >= 1);
+#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or \
+  (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and NVCOMP_PATCH_VERSION >= 1)
+#define NVCOMP_HAS_TEMPSIZE_EX 1
+#else
+#define NVCOMP_HAS_TEMPSIZE_EX 0
+#endif
 
 // ZSTD is stable for nvcomp 2.3.2 or newer
-constexpr bool NVCOMP_ZSTD_IS_STABLE = NVCOMP_MAJOR_VERSION > 2 or
-                                       (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or
-                                       (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and
-                                        NVCOMP_PATCH_VERSION >= 2);
+#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or \
+  (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and NVCOMP_PATCH_VERSION >= 2)
+#define NVCOMP_ZSTD_IS_STABLE 1
+#else
+#define NVCOMP_ZSTD_IS_STABLE 0
+#endif
 
 // Issue https://github.com/NVIDIA/spark-rapids/issues/6614 impacts nvCOMP 2.4.0 ZSTD decompression
 // on compute 6.x
-constexpr bool NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL =
-  NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 4 and NVCOMP_PATCH_VERSION == 0;
+#if NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 4 and NVCOMP_PATCH_VERSION == 0
+#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL 1
+#else
+#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL 0
+#endif
 
 namespace cudf::io::nvcomp {
 
@@ -63,20 +79,20 @@ template <typename... Args>
 std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
                                                                   Args&&... args)
 {
-  if constexpr (NVCOMP_HAS_TEMPSIZE_EX) {
-    switch (compression) {
-      case compression_type::SNAPPY:
-        return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward<Args>(args)...);
-      case compression_type::ZSTD:
-        if constexpr (NVCOMP_HAS_ZSTD_DECOMP) {
-          return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
-        } else {
-          return std::nullopt;
-        }
-      case compression_type::DEFLATE: [[fallthrough]];
-      default: return std::nullopt;
-    }
+#if NVCOMP_HAS_TEMPSIZE_EX
+  switch (compression) {
+    case compression_type::SNAPPY:
+      return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward<Args>(args)...);
+    case compression_type::ZSTD:
+#if NVCOMP_HAS_ZSTD_DECOMP
+      return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
+#else
+      return std::nullopt;
+#endif
+    case compression_type::DEFLATE: [[fallthrough]];
+    default: return std::nullopt;
   }
+#endif
   return std::nullopt;
 }
 
@@ -88,17 +104,17 @@ auto batched_decompress_get_temp_size(compression_type compression, Args&&... ar
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressGetTempSize(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-      if constexpr (NVCOMP_HAS_ZSTD_DECOMP) {
-        return nvcompBatchedZstdDecompressGetTempSize(std::forward<Args>(args)...);
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_ZSTD_DECOMP
+      return nvcompBatchedZstdDecompressGetTempSize(std::forward<Args>(args)...);
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     case compression_type::DEFLATE:
-      if constexpr (NVCOMP_HAS_DEFLATE) {
-        return nvcompBatchedDeflateDecompressGetTempSize(std::forward<Args>(args)...);
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_DEFLATE
+      return nvcompBatchedDeflateDecompressGetTempSize(std::forward<Args>(args)...);
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     default: CUDF_FAIL("Unsupported compression type");
   }
 }
@@ -111,18 +127,17 @@ auto batched_decompress_async(compression_type compression, Args&&... args)
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressAsync(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-      if constexpr (NVCOMP_HAS_ZSTD_DECOMP) {
-        return nvcompBatchedZstdDecompressAsync(std::forward<Args>(args)...);
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
-
+#if NVCOMP_HAS_ZSTD_DECOMP
+      return nvcompBatchedZstdDecompressAsync(std::forward<Args>(args)...);
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     case compression_type::DEFLATE:
-      if constexpr (NVCOMP_HAS_DEFLATE) {
-        return nvcompBatchedDeflateDecompressAsync(std::forward<Args>(args)...);
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_DEFLATE
+      return nvcompBatchedDeflateDecompressAsync(std::forward<Args>(args)...);
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     default: CUDF_FAIL("Unsupported compression type");
   }
 }
@@ -155,13 +170,13 @@ void check_is_zstd_enabled()
                "Zstandard compression is experimental, you can enable it through "
                "`LIBCUDF_NVCOMP_POLICY` environment variable.");
 
-  if constexpr (NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL) {
-    int device;
-    int cc_major;
-    CUDF_CUDA_TRY(cudaGetDevice(&device));
-    CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device));
-    CUDF_EXPECTS(cc_major != 6, "Zstandard decompression is disabled on Pascal GPUs");
-  }
+#if NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL
+  int device;
+  int cc_major;
+  CUDF_CUDA_TRY(cudaGetDevice(&device));
+  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device));
+  CUDF_EXPECTS(cc_major != 6, "Zstandard decompression is disabled on Pascal GPUs");
+#endif
 }
 
 void batched_decompress(compression_type compression,
@@ -213,22 +228,21 @@ auto batched_compress_temp_size(compression_type compression,
         batch_size, max_uncompressed_chunk_bytes, nvcompBatchedSnappyDefaultOpts, &temp_size);
       break;
     case compression_type::DEFLATE:
-      if constexpr (NVCOMP_HAS_DEFLATE) {
-        nvcomp_status = nvcompBatchedDeflateCompressGetTempSize(
-          batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size);
-        break;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_DEFLATE
+      nvcomp_status = nvcompBatchedDeflateCompressGetTempSize(
+        batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size);
+      break;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     case compression_type::ZSTD:
-      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
-        nvcomp_status = nvcompBatchedZstdCompressGetTempSize(
-          batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size);
-        break;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
-
+#if NVCOMP_HAS_ZSTD_COMP
+      nvcomp_status = nvcompBatchedZstdCompressGetTempSize(
+        batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size);
+      break;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     default: CUDF_FAIL("Unsupported compression type");
   }
 
@@ -252,21 +266,21 @@ size_t compress_max_output_chunk_size(compression_type compression,
         capped_uncomp_bytes, nvcompBatchedSnappyDefaultOpts, &max_comp_chunk_size);
       break;
     case compression_type::DEFLATE:
-      if constexpr (NVCOMP_HAS_DEFLATE) {
-        status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize(
-          capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size);
-        break;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_DEFLATE
+      status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize(
+        capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size);
+      break;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     case compression_type::ZSTD:
-      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
-        status = nvcompBatchedZstdCompressGetMaxOutputChunkSize(
-          capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size);
-        break;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_ZSTD_COMP
+      status = nvcompBatchedZstdCompressGetMaxOutputChunkSize(
+        capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size);
+      break;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     default: CUDF_FAIL("Unsupported compression type");
   }
 
@@ -302,39 +316,37 @@ static void batched_compress_async(compression_type compression,
                                                        stream.value());
       break;
     case compression_type::DEFLATE:
-      if constexpr (NVCOMP_HAS_DEFLATE) {
-        nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs,
-                                                          device_uncompressed_bytes,
-                                                          max_uncompressed_chunk_bytes,
-                                                          batch_size,
-                                                          device_temp_ptr,
-                                                          temp_bytes,
-                                                          device_compressed_ptrs,
-                                                          device_compressed_bytes,
-                                                          nvcompBatchedDeflateDefaultOpts,
-                                                          stream.value());
-        break;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
-
+#if NVCOMP_HAS_DEFLATE
+      nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs,
+                                                        device_uncompressed_bytes,
+                                                        max_uncompressed_chunk_bytes,
+                                                        batch_size,
+                                                        device_temp_ptr,
+                                                        temp_bytes,
+                                                        device_compressed_ptrs,
+                                                        device_compressed_bytes,
+                                                        nvcompBatchedDeflateDefaultOpts,
+                                                        stream.value());
+      break;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     case compression_type::ZSTD:
-      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
-        nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs,
-                                                       device_uncompressed_bytes,
-                                                       max_uncompressed_chunk_bytes,
-                                                       batch_size,
-                                                       device_temp_ptr,
-                                                       temp_bytes,
-                                                       device_compressed_ptrs,
-                                                       device_compressed_bytes,
-                                                       nvcompBatchedZstdDefaultOpts,
-                                                       stream.value());
-        break;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
-
+#if NVCOMP_HAS_ZSTD_COMP
+      nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs,
+                                                     device_uncompressed_bytes,
+                                                     max_uncompressed_chunk_bytes,
+                                                     batch_size,
+                                                     device_temp_ptr,
+                                                     temp_bytes,
+                                                     device_compressed_ptrs,
+                                                     device_compressed_bytes,
+                                                     nvcompBatchedZstdDefaultOpts,
+                                                     stream.value());
+      break;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     default: CUDF_FAIL("Unsupported compression type");
   }
   CUDF_EXPECTS(nvcomp_status == nvcompStatus_t::nvcompSuccess, "Error in compression");
@@ -418,11 +430,11 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
     case compression_type::DEFLATE: return 64 * 1024;
     case compression_type::SNAPPY: return std::nullopt;
     case compression_type::ZSTD:
-      if constexpr (NVCOMP_HAS_ZSTD_COMP) {
-        return nvcompZstdCompressionMaxAllowedChunkSize;
-      } else {
-        CUDF_FAIL("Unsupported compression type");
-      }
+#if NVCOMP_HAS_ZSTD_COMP
+      return nvcompZstdCompressionMaxAllowedChunkSize;
+#else
+      CUDF_FAIL("Unsupported compression type");
+#endif
     default: return std::nullopt;
   }
 }

From fac35b48490ed659d805eb5a8e62016622ac3fea Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 26 Oct 2022 13:33:21 -0400
Subject: [PATCH 079/202] Fix some libcudf calls to cudf::detail::gather
 (#11963)

Fixes a couple source files that were calling gather by type-dispatching directly to the internal `column_gatherer` functor instead of using the `cudf::detail::gather` function(s). This simplifies the code and improves maintenance. For example, extra code to resolve the null-mask is eliminated since the appropriate `cudf::detail::gather` call does this automatically.
No function has changed, just code cleanup.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11963
---
 cpp/src/lists/copying/gather.cu      | 41 ++++++++--------------------
 cpp/src/partitioning/partitioning.cu | 17 ++++++------
 2 files changed, 19 insertions(+), 39 deletions(-)

diff --git a/cpp/src/lists/copying/gather.cu b/cpp/src/lists/copying/gather.cu
index ae9fab4dda2..eda46e05f18 100644
--- a/cpp/src/lists/copying/gather.cu
+++ b/cpp/src/lists/copying/gather.cu
@@ -100,36 +100,17 @@ std::unique_ptr<column> gather_list_leaf(column_view const& column,
   size_type gather_map_size = gd.gather_map_size;
 
   // call the normal gather
-  auto leaf_column = cudf::type_dispatcher<dispatch_storage_type>(
-    column.type(),
-    cudf::detail::column_gatherer{},
-    column,
-    gather_map_begin,
-    gather_map_begin + gather_map_size,
-    // note : we don't need to bother checking for out-of-bounds here since
-    // our inputs at this stage aren't coming from the user.
-    false,
-    stream,
-    mr);
-
-  // the column_gatherer doesn't create the null mask because it expects
-  // that will be done in the gather_bitmask() step.  however, gather_bitmask()
-  // only happens at the root level, and by definition this column is a
-  // leaf.  so we have to generate the bitmask ourselves.
-  // TODO : it might make sense to expose a gather() function that takes a column_view and
-  // returns a column that does this work correctly.
-  size_type null_count = column.null_count();
-  if (null_count > 0) {
-    auto list_cdv = column_device_view::create(column, stream);
-    auto validity = cudf::detail::valid_if(
-      gather_map_begin,
-      gather_map_begin + gd.gather_map_size,
-      [cdv = *list_cdv] __device__(int index) { return cdv.is_valid(index) ? true : false; },
-      stream,
-      mr);
-
-    leaf_column->set_null_mask(std::move(validity.first), validity.second);
-  }
+  // note : we don't need to bother checking for out-of-bounds here since
+  // our inputs at this stage aren't coming from the user.
+  auto gather_table = cudf::detail::gather(cudf::table_view({column}),
+                                           gather_map_begin,
+                                           gather_map_begin + gather_map_size,
+                                           out_of_bounds_policy::DONT_CHECK,
+                                           stream,
+                                           mr);
+  auto leaf_column  = std::move(gather_table->release().front());
+
+  if (column.null_count() == 0) { leaf_column->set_null_mask(rmm::device_buffer{}, 0); }
 
   return leaf_column;
 }
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index e4d366e7d01..cbe65354696 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -17,6 +17,7 @@
 #include <cub/cub.cuh>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
+#include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/scatter.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
@@ -436,15 +437,13 @@ struct copy_block_partitions_dispatcher {
                                          grid_size,
                                          stream);
 
-    // Use gather instead for non-fixed width types
-    return type_dispatcher(input.type(),
-                           detail::column_gatherer{},
-                           input,
-                           gather_map.begin(),
-                           gather_map.end(),
-                           false,
-                           stream,
-                           mr);
+    auto gather_table = cudf::detail::gather(cudf::table_view({input}),
+                                             gather_map,
+                                             out_of_bounds_policy::DONT_CHECK,
+                                             cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                             stream,
+                                             mr);
+    return std::move(gather_table->release().front());
   }
 };
 

From 72572a8d8235822c683790e222f8158f214fb6d4 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 26 Oct 2022 13:33:54 -0500
Subject: [PATCH 080/202] Determine if Arrow has S3 support at runtime in unit
 test. (#11560)

Resolves #11559. This PR improves the logic for testing S3 support. Previously this test relied on the value of `CUDF_ENABLE_ARROW_S3`, which only enables S3 support in Arrow if Arrow is being built from source by libcudf. If the Arrow package is found locally (rather than fetched and built), the value of `CUDF_ENABLE_ARROW_S3` was irrelevant. Therefore, the tests using the compile-time value of `CUDF_ENABLE_ARROW_S3` were unable to correctly detect Arrow's S3 support. This PR fixes the problem by checking Arrow S3 support at runtime.

I tested this locally for the case where Arrow doesn't have S3 support (our CI uses prebuilt Arrow packages with S3 enabled).

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Tobias Ribizel (https://github.com/upsj)

URL: https://github.com/rapidsai/cudf/pull/11560
---
 cpp/tests/io/arrow_io_source_test.cpp | 51 +++++++++++++--------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/cpp/tests/io/arrow_io_source_test.cpp b/cpp/tests/io/arrow_io_source_test.cpp
index 24964db5f8c..3ef61b0ee26 100644
--- a/cpp/tests/io/arrow_io_source_test.cpp
+++ b/cpp/tests/io/arrow_io_source_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@
 #include <cudf/io/json.hpp>
 #include <cudf/io/parquet.hpp>
 
+#include <arrow/filesystem/filesystem.h>
 #include <arrow/io/api.h>
 
 #include <fstream>
@@ -61,36 +62,32 @@ TEST_F(ArrowIOTest, URIFileSystem)
   ASSERT_EQ(2, tbl.tbl->num_rows());
 }
 
-#ifdef S3_ENABLED
-
 TEST_F(ArrowIOTest, S3FileSystem)
 {
   std::string s3_uri = "s3://rapidsai-data/cudf/test/tips.parquet?region=us-east-2";
-  std::unique_ptr<cudf::io::arrow_io_source> datasource =
-    std::make_unique<cudf::io::arrow_io_source>(s3_uri);
-
-  // Populate the Parquet Reader Options
-  cudf::io::source_info src(datasource.get());
-  std::vector<std::string> single_column;
-  single_column.insert(single_column.begin(), "total_bill");
-  cudf::io::parquet_reader_options_builder builder(src);
-  cudf::io::parquet_reader_options options = builder.columns(single_column).build();
-
-  // Read the Parquet file from S3
-  cudf::io::table_with_metadata tbl = cudf::io::read_parquet(options);
-
-  ASSERT_EQ(1, tbl.tbl->num_columns());  // Only single column specified in reader_options
-  ASSERT_EQ(244, tbl.tbl->num_rows());   // known number of rows from the S3 file
-}
-
-#else
 
-TEST_F(ArrowIOTest, S3URIWhenNotEnabled)
-{
-  std::string s3_uri = "s3://rapidsai-data/cudf/test/tips.parquet?region=us-east-2";
-  EXPECT_THROW(std::make_unique<cudf::io::arrow_io_source>(s3_uri), cudf::logic_error);
+  // Check to see if Arrow was built with support for S3. If not, ensure this
+  // test throws. If so, validate the S3 file contents.
+  auto const s3_unsupported = arrow::fs::FileSystemFromUri(s3_uri).status().IsNotImplemented();
+  if (s3_unsupported) {
+    EXPECT_THROW(std::make_unique<cudf::io::arrow_io_source>(s3_uri), cudf::logic_error);
+  } else {
+    std::unique_ptr<cudf::io::arrow_io_source> datasource =
+      std::make_unique<cudf::io::arrow_io_source>(s3_uri);
+
+    // Populate the Parquet Reader Options
+    cudf::io::source_info src(datasource.get());
+    std::vector<std::string> single_column;
+    single_column.insert(single_column.begin(), "total_bill");
+    cudf::io::parquet_reader_options_builder builder(src);
+    cudf::io::parquet_reader_options options = builder.columns(single_column).build();
+
+    // Read the Parquet file from S3
+    cudf::io::table_with_metadata tbl = cudf::io::read_parquet(options);
+
+    ASSERT_EQ(1, tbl.tbl->num_columns());  // Only single column specified in reader_options
+    ASSERT_EQ(244, tbl.tbl->num_rows());   // known number of rows from the S3 file
+  }
 }
 
-#endif
-
 CUDF_TEST_PROGRAM_MAIN()

From 07eb7235338699662cddf242ca8877abdf5ca383 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 26 Oct 2022 12:17:57 -0700
Subject: [PATCH 081/202] Feature/remove default streams (#11967)

Default stream parameters can lead to subtle bugs that are hard to track down if public APIs start exposing streams. Removing the defaults ensures that streams are properly forwarded through everywhere that they should be.

This PR partially addresses #9854. It does not change the cases where removing the default value from a stream parameter would necessitate changing the order of parameters in the function signature due to the presence of other default parameters. That work will be done in a follow-up PR.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/11967
---
 cpp/benchmarks/common/generate_input.cu       | 21 ++++++----
 cpp/benchmarks/iterator/iterator.cu           |  6 ++-
 cpp/benchmarks/join/join_common.hpp           |  4 +-
 cpp/benchmarks/string/json.cu                 |  3 +-
 cpp/include/cudf/binaryop.hpp                 |  2 +-
 cpp/include/cudf/detail/binaryop.hpp          |  8 ++--
 cpp/include/cudf/detail/concatenate.hpp       |  4 +-
 cpp/include/cudf/detail/copy.hpp              | 40 +++++++++---------
 cpp/include/cudf/detail/copy_if.cuh           |  2 +-
 cpp/include/cudf/detail/copy_range.cuh        |  6 +--
 cpp/include/cudf/detail/datetime.hpp          | 32 +++++++-------
 cpp/include/cudf/detail/fill.hpp              |  4 +-
 cpp/include/cudf/detail/gather.hpp            |  4 +-
 .../detail/groupby/group_replace_nulls.hpp    |  2 +-
 cpp/include/cudf/detail/hashing.hpp           |  2 +-
 cpp/include/cudf/detail/interop.hpp           |  6 +--
 cpp/include/cudf/detail/is_element_valid.hpp  |  2 +-
 cpp/include/cudf/detail/join.hpp              |  2 +-
 cpp/include/cudf/detail/label_bins.hpp        |  2 +-
 cpp/include/cudf/detail/null_mask.hpp         |  4 +-
 cpp/include/cudf/detail/quantiles.hpp         |  2 +-
 .../cudf/detail/reduction_functions.hpp       | 42 +++++++++----------
 cpp/include/cudf/detail/repeat.hpp            |  4 +-
 cpp/include/cudf/detail/replace.hpp           | 14 +++----
 cpp/include/cudf/detail/reshape.hpp           |  4 +-
 cpp/include/cudf/detail/rolling.hpp           |  2 +-
 cpp/include/cudf/detail/round.hpp             |  2 +-
 cpp/include/cudf/detail/scatter.cuh           |  2 +-
 cpp/include/cudf/detail/scatter.hpp           | 10 ++---
 cpp/include/cudf/detail/sequence.hpp          |  6 +--
 cpp/include/cudf/detail/stream_compaction.hpp | 10 ++---
 cpp/include/cudf/detail/structs/utilities.hpp |  4 +-
 cpp/include/cudf/detail/tdigest/tdigest.hpp   |  6 +--
 cpp/include/cudf/detail/transform.hpp         | 16 +++----
 cpp/include/cudf/detail/transpose.hpp         |  2 +-
 cpp/include/cudf/detail/unary.hpp             | 10 ++---
 cpp/include/cudf/detail/utilities/cuda.cuh    |  3 +-
 .../detail/utilities/vector_factories.hpp     | 21 +++++-----
 cpp/include/cudf/detail/valid_if.cuh          |  2 +-
 .../cudf/dictionary/detail/concatenate.hpp    |  2 +-
 cpp/include/cudf/dictionary/detail/encode.hpp |  2 +-
 .../cudf/dictionary/detail/replace.hpp        |  4 +-
 cpp/include/cudf/dictionary/detail/search.hpp |  4 +-
 .../cudf/dictionary/detail/update_keys.hpp    | 12 +++---
 cpp/include/cudf/io/detail/avro.hpp           |  2 +-
 cpp/include/cudf/io/detail/csv.hpp            |  2 +-
 cpp/include/cudf/io/detail/json.hpp           |  2 +-
 cpp/include/cudf/io/detail/orc.hpp            |  3 +-
 cpp/include/cudf/lists/detail/concatenate.hpp |  2 +-
 cpp/include/cudf/lists/detail/gather.cuh      |  3 +-
 cpp/include/cudf/lists/detail/scatter.cuh     |  6 +--
 .../cudf/lists/lists_column_factories.hpp     |  2 +-
 cpp/include/cudf/strings/detail/combine.hpp   |  5 ++-
 .../cudf/strings/detail/concatenate.hpp       |  2 +-
 cpp/include/cudf/strings/detail/copying.hpp   |  3 +-
 cpp/include/cudf/strings/detail/fill.hpp      |  2 +-
 cpp/include/cudf/strings/detail/json.hpp      |  2 +-
 cpp/include/cudf/strings/detail/replace.hpp   | 15 ++++---
 cpp/include/cudf/strings/detail/scatter.cuh   |  2 +-
 cpp/include/cudf/strings/detail/utilities.cuh |  6 +--
 cpp/include/cudf/strings/detail/utilities.hpp |  4 +-
 cpp/include/cudf_test/column_wrapper.hpp      | 14 +++----
 cpp/include/cudf_test/tdigest_utilities.cuh   | 10 ++---
 cpp/include/nvtext/detail/tokenize.hpp        | 14 ++++---
 cpp/src/bitmask/null_mask.cu                  |  2 +-
 cpp/src/copying/purge_nonempty_nulls.cu       |  5 ++-
 cpp/src/dictionary/remove_keys.cu             |  6 +--
 cpp/src/hash/concurrent_unordered_map.cuh     | 13 +++---
 cpp/src/io/functions.cpp                      |  2 +-
 cpp/src/io/json/json_column.cu                |  2 +-
 cpp/src/join/conditional_join.hpp             |  2 +-
 cpp/src/lists/combine/concatenate_rows.cu     |  6 ++-
 cpp/src/reductions/reductions.cpp             |  4 +-
 cpp/src/stream_compaction/distinct_count.cu   |  2 +-
 cpp/src/stream_compaction/unique_count.cu     |  2 +-
 cpp/src/strings/replace/replace_re.cu         |  2 +-
 cpp/src/strings/strings_column_factories.cu   |  2 +-
 cpp/src/structs/utilities.cpp                 |  4 +-
 cpp/tests/bitmask/bitmask_tests.cpp           |  3 +-
 cpp/tests/bitmask/is_element_valid_tests.cpp  | 29 ++++++-------
 cpp/tests/bitmask/valid_if_tests.cu           | 33 +++++++++------
 cpp/tests/copying/detail_gather_tests.cu      |  3 +-
 cpp/tests/copying/gather_list_tests.cpp       |  3 +-
 cpp/tests/copying/gather_str_tests.cpp        | 12 ++++--
 .../copying/purge_nonempty_nulls_tests.cpp    | 15 ++++---
 .../copying/segmented_gather_list_tests.cpp   |  5 ++-
 .../device_atomics/device_atomics_test.cu     | 13 +++---
 cpp/tests/dictionary/search_test.cpp          | 14 ++++---
 cpp/tests/fixed_point/fixed_point_tests.cu    |  6 +--
 cpp/tests/groupby/tdigest_tests.cu            |  6 +--
 cpp/tests/hash_map/map_test.cu                |  2 +-
 cpp/tests/iterator/iterator_tests.cuh         |  5 ++-
 cpp/tests/iterator/value_iterator_test.cuh    |  2 +-
 .../iterator/value_iterator_test_strings.cu   |  8 ++--
 cpp/tests/join/join_tests.cpp                 |  6 ++-
 cpp/tests/lists/contains_tests.cpp            | 10 +++--
 cpp/tests/lists/extract_tests.cpp             |  3 +-
 .../partitioning/hash_partition_test.cpp      |  3 +-
 cpp/tests/quantiles/percentile_approx_test.cu | 21 ++++++----
 cpp/tests/replace/replace_nulls_tests.cpp     | 14 ++++---
 cpp/tests/scalar/scalar_device_view_test.cu   |  2 +-
 cpp/tests/strings/concatenate_tests.cpp       |  6 +--
 cpp/tests/strings/contains_tests.cpp          |  4 +-
 cpp/tests/strings/factories_test.cu           | 11 ++---
 cpp/tests/strings/fill_tests.cpp              | 30 +++++++++----
 cpp/tests/strings/integers_tests.cpp          |  6 +--
 cpp/tests/structs/utilities_tests.cpp         | 35 +++++++++-------
 cpp/tests/table/table_view_tests.cu           |  3 +-
 cpp/tests/types/type_dispatcher_test.cu       |  4 +-
 cpp/tests/utilities/column_utilities.cu       |  5 ++-
 java/src/main/native/src/ColumnVectorJni.cpp  |  5 ++-
 java/src/main/native/src/ColumnViewJni.cpp    |  8 ++--
 java/src/main/native/src/ColumnViewJni.cu     | 12 +++---
 java/src/main/native/src/ScalarJni.cpp        |  4 +-
 114 files changed, 463 insertions(+), 385 deletions(-)

diff --git a/cpp/benchmarks/common/generate_input.cu b/cpp/benchmarks/common/generate_input.cu
index 50adab71200..dee7e2b8586 100644
--- a/cpp/benchmarks/common/generate_input.cu
+++ b/cpp/benchmarks/common/generate_input.cu
@@ -429,8 +429,8 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
                    null_mask.begin());
   }
 
-  auto [result_bitmask, null_count] =
-    cudf::detail::valid_if(null_mask.begin(), null_mask.end(), thrust::identity<bool>{});
+  auto [result_bitmask, null_count] = cudf::detail::valid_if(
+    null_mask.begin(), null_mask.end(), thrust::identity<bool>{}, cudf::get_default_stream());
 
   return std::make_unique<cudf::column>(
     dtype,
@@ -508,8 +508,8 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
                      thrust::make_zip_iterator(offsets.begin(), offsets.begin() + 1),
                      num_rows,
                      string_generator{chars.data(), engine});
-  auto [result_bitmask, null_count] =
-    cudf::detail::valid_if(null_mask.begin(), null_mask.end() - 1, thrust::identity<bool>{});
+  auto [result_bitmask, null_count] = cudf::detail::valid_if(
+    null_mask.begin(), null_mask.end() - 1, thrust::identity<bool>{}, cudf::get_default_stream());
   return cudf::make_strings_column(
     num_rows,
     std::move(offsets),
@@ -541,7 +541,8 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
   auto str_table      = cudf::detail::gather(cudf::table_view{{sample_strings->view()}},
                                         sample_indices,
                                         cudf::out_of_bounds_policy::DONT_CHECK,
-                                        cudf::detail::negative_index_policy::NOT_ALLOWED);
+                                        cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                        cudf::get_default_stream());
   return std::move(str_table->release()[0]);
 }
 
@@ -625,7 +626,8 @@ std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profi
       auto [null_mask, null_count] = [&]() {
         if (profile.get_null_probability().has_value()) {
           auto valids = valid_dist(engine, num_rows);
-          return cudf::detail::valid_if(valids.begin(), valids.end(), thrust::identity<bool>{});
+          return cudf::detail::valid_if(
+            valids.begin(), valids.end(), thrust::identity<bool>{}, cudf::get_default_stream());
         }
         return std::pair<rmm::device_buffer, cudf::size_type>{};
       }();
@@ -708,8 +710,8 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
     auto offsets_column = std::make_unique<cudf::column>(
       cudf::data_type{cudf::type_id::INT32}, num_rows + 1, offsets.release());
 
-    auto [null_mask, null_count] =
-      cudf::detail::valid_if(valids.begin(), valids.end(), thrust::identity<bool>{});
+    auto [null_mask, null_count] = cudf::detail::valid_if(
+      valids.begin(), valids.end(), thrust::identity<bool>{}, cudf::get_default_stream());
     list_column = cudf::make_lists_column(
       num_rows,
       std::move(offsets_column),
@@ -835,7 +837,8 @@ std::pair<rmm::device_buffer, cudf::size_type> create_random_null_mask(
   } else {
     return cudf::detail::valid_if(thrust::make_counting_iterator<cudf::size_type>(0),
                                   thrust::make_counting_iterator<cudf::size_type>(size),
-                                  bool_generator{seed, 1.0 - *null_probability});
+                                  bool_generator{seed, 1.0 - *null_probability},
+                                  cudf::get_default_stream());
   }
 }
 
diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu
index 381cbe4824b..73060200d00 100644
--- a/cpp/benchmarks/iterator/iterator.cu
+++ b/cpp/benchmarks/iterator/iterator.cu
@@ -140,7 +140,8 @@ void BM_iterator(benchmark::State& state)
   cudf::column_view hasnull_F = wrap_hasnull_F;
 
   // Initialize dev_result to false
-  auto dev_result = cudf::detail::make_zeroed_device_uvector_sync<TypeParam>(1);
+  auto dev_result =
+    cudf::detail::make_zeroed_device_uvector_sync<TypeParam>(1, cudf::get_default_stream());
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     if (cub_or_thrust) {
@@ -208,7 +209,8 @@ void BM_pair_iterator(benchmark::State& state)
   cudf::column_view hasnull_T = wrap_hasnull_T;
 
   // Initialize dev_result to false
-  auto dev_result = cudf::detail::make_zeroed_device_uvector_sync<thrust::pair<T, bool>>(1);
+  auto dev_result = cudf::detail::make_zeroed_device_uvector_sync<thrust::pair<T, bool>>(
+    1, cudf::get_default_stream());
   for (auto _ : state) {
     cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
     if (cub_or_thrust) {
diff --git a/cpp/benchmarks/join/join_common.hpp b/cpp/benchmarks/join/join_common.hpp
index d4fb0862506..ad288edb169 100644
--- a/cpp/benchmarks/join/join_common.hpp
+++ b/cpp/benchmarks/join/join_common.hpp
@@ -86,7 +86,9 @@ static void BM_join(state_type& state, Join JoinFunc)
     // roughly 75% nulls
     auto validity =
       thrust::make_transform_iterator(thrust::make_counting_iterator(0), null75_generator{});
-    return cudf::detail::valid_if(validity, validity + size, thrust::identity<bool>{}).first;
+    return cudf::detail::valid_if(
+             validity, validity + size, thrust::identity<bool>{}, cudf::get_default_stream())
+      .first;
   };
 
   std::unique_ptr<cudf::column> build_key_column0 = [&]() {
diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/string/json.cu
index 5ee56c3cdae..87528608cc7 100644
--- a/cpp/benchmarks/string/json.cu
+++ b/cpp/benchmarks/string/json.cu
@@ -177,7 +177,8 @@ auto build_json_string_column(int desired_bytes, int num_rows)
   auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
   json_benchmark_row_builder jb{
     desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
-  auto children = cudf::strings::detail::make_strings_children(jb, num_rows);
+  auto children =
+    cudf::strings::detail::make_strings_children(jb, num_rows, cudf::get_default_stream());
   return cudf::make_strings_column(
     num_rows, std::move(children.first), std::move(children.second), 0, {});
 }
diff --git a/cpp/include/cudf/binaryop.hpp b/cpp/include/cudf/binaryop.hpp
index 554a38e03e5..fabe0d86fc4 100644
--- a/cpp/include/cudf/binaryop.hpp
+++ b/cpp/include/cudf/binaryop.hpp
@@ -255,7 +255,7 @@ void apply_sorting_struct_binary_op(mutable_column_view& out,
                                     bool is_lhs_scalar,
                                     bool is_rhs_scalar,
                                     binary_operator op,
-                                    rmm::cuda_stream_view stream = cudf::get_default_stream());
+                                    rmm::cuda_stream_view stream);
 }  // namespace detail
 }  // namespace compiled
 }  // namespace binops
diff --git a/cpp/include/cudf/detail/binaryop.hpp b/cpp/include/cudf/detail/binaryop.hpp
index 944f2eef743..ffd8be971ab 100644
--- a/cpp/include/cudf/detail/binaryop.hpp
+++ b/cpp/include/cudf/detail/binaryop.hpp
@@ -35,7 +35,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   std::string const& ptx,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -49,7 +49,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -63,7 +63,7 @@ std::unique_ptr<column> binary_operation(
   scalar const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -77,7 +77,7 @@ std::unique_ptr<column> binary_operation(
   column_view const& rhs,
   binary_operator op,
   data_type output_type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/concatenate.hpp b/cpp/include/cudf/detail/concatenate.hpp
index ae5c95c4645..925029597a6 100644
--- a/cpp/include/cudf/detail/concatenate.hpp
+++ b/cpp/include/cudf/detail/concatenate.hpp
@@ -35,7 +35,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns_to_concat,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -45,7 +45,7 @@ std::unique_ptr<column> concatenate(
  */
 std::unique_ptr<table> concatenate(
   host_span<table_view const> tables_to_concat,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 23ed9090f05..22453315762 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -77,7 +77,7 @@ ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type
  */
 std::vector<column_view> slice(column_view const& input,
                                host_span<size_type const> indices,
-                               rmm::cuda_stream_view stream = cudf::get_default_stream());
+                               rmm::cuda_stream_view stream);
 /**
  * @copydoc cudf::slice(column_view const&, std::initializer_list<size_type>)
  *
@@ -85,7 +85,7 @@ std::vector<column_view> slice(column_view const& input,
  */
 std::vector<column_view> slice(column_view const& input,
                                std::initializer_list<size_type> indices,
-                               rmm::cuda_stream_view stream = cudf::get_default_stream());
+                               rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::slice(table_view const&, host_span<size_type const>)
@@ -94,7 +94,7 @@ std::vector<column_view> slice(column_view const& input,
  */
 std::vector<table_view> slice(table_view const& input,
                               host_span<size_type const> indices,
-                              rmm::cuda_stream_view stream = cudf::get_default_stream());
+                              rmm::cuda_stream_view stream);
 /**
  * @copydoc cudf::slice(table_view const&, std::initializer_list<size_type>)
  *
@@ -102,7 +102,7 @@ std::vector<table_view> slice(table_view const& input,
  */
 std::vector<table_view> slice(table_view const& input,
                               std::initializer_list<size_type> indices,
-                              rmm::cuda_stream_view stream = cudf::get_default_stream());
+                              rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::split(column_view const&, host_span<size_type const>)
@@ -111,7 +111,7 @@ std::vector<table_view> slice(table_view const& input,
  */
 std::vector<column_view> split(column_view const& input,
                                host_span<size_type const> splits,
-                               rmm::cuda_stream_view stream = cudf::get_default_stream());
+                               rmm::cuda_stream_view stream);
 /**
  * @copydoc cudf::split(column_view const&, std::initializer_list<size_type>)
  *
@@ -119,7 +119,7 @@ std::vector<column_view> split(column_view const& input,
  */
 std::vector<column_view> split(column_view const& input,
                                std::initializer_list<size_type> splits,
-                               rmm::cuda_stream_view stream = cudf::get_default_stream());
+                               rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::split(table_view const&, host_span<size_type const>)
@@ -128,7 +128,7 @@ std::vector<column_view> split(column_view const& input,
  */
 std::vector<table_view> split(table_view const& input,
                               host_span<size_type const> splits,
-                              rmm::cuda_stream_view stream = cudf::get_default_stream());
+                              rmm::cuda_stream_view stream);
 /**
  * @copydoc cudf::split(table_view const&, std::initializer_list<size_type>)
  *
@@ -136,7 +136,7 @@ std::vector<table_view> split(table_view const& input,
  */
 std::vector<table_view> split(table_view const& input,
                               std::initializer_list<size_type> splits,
-                              rmm::cuda_stream_view stream = cudf::get_default_stream());
+                              rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::shift(column_view const&,size_type,scalar const&,
@@ -148,7 +148,7 @@ std::unique_ptr<column> shift(
   column_view const& input,
   size_type offset,
   scalar const& fill_value,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -189,7 +189,7 @@ std::unique_ptr<column> segmented_shift(
   device_span<size_type const> segment_offsets,
   size_type offset,
   scalar const& fill_value,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -200,7 +200,7 @@ std::unique_ptr<column> segmented_shift(
 std::vector<packed_table> contiguous_split(
   cudf::table_view const& input,
   std::vector<size_type> const& splits,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -209,7 +209,7 @@ std::vector<packed_table> contiguous_split(
  * @param stream Optional CUDA stream on which to execute kernels
  **/
 packed_columns pack(cudf::table_view const& input,
-                    rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+                    rmm::cuda_stream_view stream,
                     rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -235,7 +235,7 @@ std::unique_ptr<column> copy_if_else(
   column_view const& lhs,
   column_view const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -248,7 +248,7 @@ std::unique_ptr<column> copy_if_else(
   scalar const& lhs,
   column_view const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -261,7 +261,7 @@ std::unique_ptr<column> copy_if_else(
   column_view const& lhs,
   scalar const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -274,7 +274,7 @@ std::unique_ptr<column> copy_if_else(
   scalar const& lhs,
   scalar const& rhs,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -298,7 +298,7 @@ std::unique_ptr<table> sample(
 std::unique_ptr<scalar> get_element(
   column_view const& input,
   size_type index,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -306,16 +306,14 @@ std::unique_ptr<scalar> get_element(
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-bool has_nonempty_nulls(column_view const& input,
-                        rmm::cuda_stream_view stream = cudf::get_default_stream());
+bool has_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::may_have_nonempty_nulls
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-bool may_have_nonempty_nulls(column_view const& input,
-                             rmm::cuda_stream_view stream = cudf::get_default_stream());
+bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh
index 229d96659df..6eea72a1e0d 100644
--- a/cpp/include/cudf/detail/copy_if.cuh
+++ b/cpp/include/cudf/detail/copy_if.cuh
@@ -323,7 +323,7 @@ template <typename Filter>
 std::unique_ptr<table> copy_if(
   table_view const& input,
   Filter filter,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/include/cudf/detail/copy_range.cuh b/cpp/include/cudf/detail/copy_range.cuh
index 09cbf706d5c..22714e97dfa 100644
--- a/cpp/include/cudf/detail/copy_range.cuh
+++ b/cpp/include/cudf/detail/copy_range.cuh
@@ -135,7 +135,7 @@ void copy_range(SourceValueIterator source_value_begin,
                 mutable_column_view& target,
                 size_type target_begin,
                 size_type target_end,
-                rmm::cuda_stream_view stream = cudf::get_default_stream())
+                rmm::cuda_stream_view stream)
 {
   CUDF_EXPECTS((target_begin <= target_end) && (target_begin >= 0) &&
                  (target_begin < target.size()) && (target_end <= target.size()),
@@ -196,7 +196,7 @@ void copy_range_in_place(column_view const& source,
                          size_type source_begin,
                          size_type source_end,
                          size_type target_begin,
-                         rmm::cuda_stream_view stream = cudf::get_default_stream());
+                         rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::copy_range
@@ -209,7 +209,7 @@ std::unique_ptr<column> copy_range(
   size_type source_begin,
   size_type source_end,
   size_type target_begin,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index d2bca74ee9b..c2e3c32b65f 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -31,7 +31,7 @@ namespace detail {
  */
 std::unique_ptr<cudf::column> extract_year(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -41,7 +41,7 @@ std::unique_ptr<cudf::column> extract_year(
  */
 std::unique_ptr<cudf::column> extract_month(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -51,7 +51,7 @@ std::unique_ptr<cudf::column> extract_month(
  */
 std::unique_ptr<cudf::column> extract_day(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -61,7 +61,7 @@ std::unique_ptr<cudf::column> extract_day(
  */
 std::unique_ptr<cudf::column> extract_weekday(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -71,7 +71,7 @@ std::unique_ptr<cudf::column> extract_weekday(
  */
 std::unique_ptr<cudf::column> extract_hour(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -81,7 +81,7 @@ std::unique_ptr<cudf::column> extract_hour(
  */
 std::unique_ptr<cudf::column> extract_minute(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -91,7 +91,7 @@ std::unique_ptr<cudf::column> extract_minute(
  */
 std::unique_ptr<cudf::column> extract_second(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -102,7 +102,7 @@ std::unique_ptr<cudf::column> extract_second(
  */
 std::unique_ptr<cudf::column> extract_millisecond_fraction(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -113,7 +113,7 @@ std::unique_ptr<cudf::column> extract_millisecond_fraction(
  */
 std::unique_ptr<cudf::column> extract_microsecond_fraction(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -124,7 +124,7 @@ std::unique_ptr<cudf::column> extract_microsecond_fraction(
  */
 std::unique_ptr<cudf::column> extract_nanosecond_fraction(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -134,7 +134,7 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
  */
 std::unique_ptr<cudf::column> last_day_of_month(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -144,7 +144,7 @@ std::unique_ptr<cudf::column> last_day_of_month(
  */
 std::unique_ptr<cudf::column> day_of_year(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -156,7 +156,7 @@ std::unique_ptr<cudf::column> day_of_year(
 std::unique_ptr<cudf::column> add_calendrical_months(
   cudf::column_view const& timestamps,
   cudf::column_view const& months,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -168,7 +168,7 @@ std::unique_ptr<cudf::column> add_calendrical_months(
 std::unique_ptr<cudf::column> add_calendrical_months(
   cudf::column_view const& timestamps,
   cudf::scalar const& months,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -178,12 +178,12 @@ std::unique_ptr<cudf::column> add_calendrical_months(
  */
 std::unique_ptr<cudf::column> is_leap_year(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<cudf::column> extract_quarter(
   cudf::column_view const& column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/fill.hpp b/cpp/include/cudf/detail/fill.hpp
index 3ac62c984fb..e34acfff6b9 100644
--- a/cpp/include/cudf/detail/fill.hpp
+++ b/cpp/include/cudf/detail/fill.hpp
@@ -36,7 +36,7 @@ void fill_in_place(mutable_column_view& destination,
                    size_type begin,
                    size_type end,
                    scalar const& value,
-                   rmm::cuda_stream_view stream = cudf::get_default_stream());
+                   rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::fill
@@ -48,7 +48,7 @@ std::unique_ptr<column> fill(
   size_type begin,
   size_type end,
   scalar const& value,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/gather.hpp b/cpp/include/cudf/detail/gather.hpp
index 2f6a9525b4e..9d61a8de184 100644
--- a/cpp/include/cudf/detail/gather.hpp
+++ b/cpp/include/cudf/detail/gather.hpp
@@ -66,7 +66,7 @@ std::unique_ptr<table> gather(
   column_view const& gather_map,
   out_of_bounds_policy bounds_policy,
   negative_index_policy neg_indices,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -81,7 +81,7 @@ std::unique_ptr<table> gather(
   device_span<size_type const> const gather_map,
   out_of_bounds_policy bounds_policy,
   negative_index_policy neg_indices,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
index 6742e7d9159..9e64048b7b4 100644
--- a/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
+++ b/cpp/include/cudf/detail/groupby/group_replace_nulls.hpp
@@ -40,7 +40,7 @@ std::unique_ptr<column> group_replace_nulls(
   cudf::column_view const& grouped_value,
   device_span<size_type const> group_labels,
   cudf::replace_policy replace_policy,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/hashing.hpp b/cpp/include/cudf/detail/hashing.hpp
index 98d3713c5c5..b7469d80a8d 100644
--- a/cpp/include/cudf/detail/hashing.hpp
+++ b/cpp/include/cudf/detail/hashing.hpp
@@ -52,7 +52,7 @@ std::unique_ptr<column> spark_murmur_hash3_32(
 
 std::unique_ptr<column> md5_hash(
   table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /* Copyright 2005-2014 Daniel James.
diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 3d22530f5b3..2215f052113 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -34,7 +34,7 @@ namespace detail {
  */
 std::unique_ptr<table> from_dlpack(
   DLManagedTensor const* managed_tensor,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -44,7 +44,7 @@ std::unique_ptr<table> from_dlpack(
  */
 DLManagedTensor* to_dlpack(
   table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 // Creating arrow as per given type_id and buffer arguments
@@ -114,7 +114,7 @@ std::shared_ptr<arrow::Table> to_arrow(table_view input,
  */
 std::unique_ptr<table> from_arrow(
   arrow::Table const& input_table,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/is_element_valid.hpp b/cpp/include/cudf/detail/is_element_valid.hpp
index e70fa8cfe5f..72a85d42eb3 100644
--- a/cpp/include/cudf/detail/is_element_valid.hpp
+++ b/cpp/include/cudf/detail/is_element_valid.hpp
@@ -41,7 +41,7 @@ namespace detail {
 
 bool is_element_valid_sync(column_view const& col_view,
                            size_type element_index,
-                           rmm::cuda_stream_view stream = cudf::get_default_stream());
+                           rmm::cuda_stream_view stream);
 
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp
index 51cda214f7b..2dfe31091ac 100644
--- a/cpp/include/cudf/detail/join.hpp
+++ b/cpp/include/cudf/detail/join.hpp
@@ -91,7 +91,7 @@ struct hash_join {
    */
   hash_join(cudf::table_view const& build,
             cudf::null_equality compare_nulls,
-            rmm::cuda_stream_view stream = cudf::get_default_stream());
+            rmm::cuda_stream_view stream);
 
   /**
    * @copydoc cudf::hash_join::inner_join
diff --git a/cpp/include/cudf/detail/label_bins.hpp b/cpp/include/cudf/detail/label_bins.hpp
index af9f5fb82f5..f556c81c371 100644
--- a/cpp/include/cudf/detail/label_bins.hpp
+++ b/cpp/include/cudf/detail/label_bins.hpp
@@ -51,7 +51,7 @@ std::unique_ptr<column> label_bins(
   inclusive left_inclusive,
   column_view const& right_edges,
   inclusive right_inclusive,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
diff --git a/cpp/include/cudf/detail/null_mask.hpp b/cpp/include/cudf/detail/null_mask.hpp
index f75e3b06ccf..a0e04d7b215 100644
--- a/cpp/include/cudf/detail/null_mask.hpp
+++ b/cpp/include/cudf/detail/null_mask.hpp
@@ -34,7 +34,7 @@ namespace detail {
 rmm::device_buffer create_null_mask(
   size_type size,
   mask_state state,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -46,7 +46,7 @@ void set_null_mask(bitmask_type* bitmask,
                    size_type begin_bit,
                    size_type end_bit,
                    bool valid,
-                   rmm::cuda_stream_view stream = cudf::get_default_stream());
+                   rmm::cuda_stream_view stream);
 
 /**
  * @brief Given a bitmask, counts the number of set (1) bits in the range
diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp
index c75b2d135d8..752f8ef6367 100644
--- a/cpp/include/cudf/detail/quantiles.hpp
+++ b/cpp/include/cudf/detail/quantiles.hpp
@@ -62,7 +62,7 @@ std::unique_ptr<table> quantiles(
 std::unique_ptr<column> percentile_approx(
   tdigest::tdigest_column_view const& input,
   column_view const& percentiles,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/reduction_functions.hpp b/cpp/include/cudf/detail/reduction_functions.hpp
index fa6652b0db3..a2de286f283 100644
--- a/cpp/include/cudf/detail/reduction_functions.hpp
+++ b/cpp/include/cudf/detail/reduction_functions.hpp
@@ -46,7 +46,7 @@ std::unique_ptr<scalar> sum(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -67,7 +67,7 @@ std::unique_ptr<scalar> min(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -88,7 +88,7 @@ std::unique_ptr<scalar> max(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -110,7 +110,7 @@ std::unique_ptr<scalar> any(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -132,7 +132,7 @@ std::unique_ptr<scalar> all(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -154,7 +154,7 @@ std::unique_ptr<scalar> product(
   column_view const& col,
   data_type const output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -174,7 +174,7 @@ std::unique_ptr<scalar> product(
 std::unique_ptr<scalar> sum_of_squares(
   column_view const& col,
   data_type const output_dtype,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -194,7 +194,7 @@ std::unique_ptr<scalar> sum_of_squares(
 std::unique_ptr<scalar> mean(
   column_view const& col,
   data_type const output_dtype,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -217,7 +217,7 @@ std::unique_ptr<scalar> variance(
   column_view const& col,
   data_type const output_dtype,
   cudf::size_type ddof,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -240,7 +240,7 @@ std::unique_ptr<scalar> standard_deviation(
   column_view const& col,
   data_type const output_dtype,
   cudf::size_type ddof,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -271,7 +271,7 @@ std::unique_ptr<scalar> nth_element(
   column_view const& col,
   size_type n,
   null_policy null_handling,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -286,7 +286,7 @@ std::unique_ptr<scalar> nth_element(
 std::unique_ptr<scalar> collect_list(
   column_view const& col,
   null_policy null_handling,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -299,7 +299,7 @@ std::unique_ptr<scalar> collect_list(
  */
 std::unique_ptr<scalar> merge_lists(
   lists_column_view const& col,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -318,7 +318,7 @@ std::unique_ptr<scalar> collect_set(
   null_policy null_handling,
   null_equality nulls_equal,
   nan_equality nans_equal,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -335,7 +335,7 @@ std::unique_ptr<scalar> merge_sets(
   lists_column_view const& col,
   null_equality nulls_equal,
   nan_equality nans_equal,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -363,7 +363,7 @@ std::unique_ptr<column> segmented_sum(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -391,7 +391,7 @@ std::unique_ptr<column> segmented_product(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -418,7 +418,7 @@ std::unique_ptr<column> segmented_min(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -445,7 +445,7 @@ std::unique_ptr<column> segmented_max(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -473,7 +473,7 @@ std::unique_ptr<column> segmented_any(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -501,7 +501,7 @@ std::unique_ptr<column> segmented_all(
   data_type const output_dtype,
   null_policy null_handling,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace reduction
diff --git a/cpp/include/cudf/detail/repeat.hpp b/cpp/include/cudf/detail/repeat.hpp
index 39a0de1bd31..69d9705556f 100644
--- a/cpp/include/cudf/detail/repeat.hpp
+++ b/cpp/include/cudf/detail/repeat.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<table> repeat(
   table_view const& input_table,
   column_view const& count,
   bool check_count,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -48,7 +48,7 @@ std::unique_ptr<table> repeat(
 std::unique_ptr<table> repeat(
   table_view const& input_table,
   size_type count,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/replace.hpp b/cpp/include/cudf/detail/replace.hpp
index 8e6e0729d07..9721c6e9849 100644
--- a/cpp/include/cudf/detail/replace.hpp
+++ b/cpp/include/cudf/detail/replace.hpp
@@ -34,7 +34,7 @@ namespace detail {
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
   cudf::column_view const& replacement,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -46,7 +46,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
   scalar const& replacement,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -58,7 +58,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nulls(
   column_view const& input,
   replace_policy const& replace_policy,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -70,7 +70,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nans(
   column_view const& input,
   column_view const& replacement,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -82,7 +82,7 @@ std::unique_ptr<column> replace_nans(
 std::unique_ptr<column> replace_nans(
   column_view const& input,
   scalar const& replacement,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -94,7 +94,7 @@ std::unique_ptr<column> find_and_replace_all(
   column_view const& input_col,
   column_view const& values_to_replace,
   column_view const& replacement_values,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -104,7 +104,7 @@ std::unique_ptr<column> find_and_replace_all(
  */
 std::unique_ptr<column> normalize_nans_and_zeros(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/reshape.hpp b/cpp/include/cudf/detail/reshape.hpp
index 205761d6888..ccffcbc61df 100644
--- a/cpp/include/cudf/detail/reshape.hpp
+++ b/cpp/include/cudf/detail/reshape.hpp
@@ -33,7 +33,7 @@ namespace detail {
 std::unique_ptr<table> tile(
   table_view const& input,
   size_type count,
-  rmm::cuda_stream_view               = cudf::get_default_stream(),
+  rmm::cuda_stream_view,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -43,7 +43,7 @@ std::unique_ptr<table> tile(
  */
 std::unique_ptr<column> interleave_columns(
   table_view const& input,
-  rmm::cuda_stream_view               = cudf::get_default_stream(),
+  rmm::cuda_stream_view,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/rolling.hpp b/cpp/include/cudf/detail/rolling.hpp
index 40bedf4046d..dcaece2bafc 100644
--- a/cpp/include/cudf/detail/rolling.hpp
+++ b/cpp/include/cudf/detail/rolling.hpp
@@ -45,7 +45,7 @@ std::unique_ptr<column> rolling_window(
   column_view const& following_window,
   size_type min_periods,
   rolling_aggregation const& agg,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/round.hpp b/cpp/include/cudf/detail/round.hpp
index 89c9ce6d0e7..1e5612919f4 100644
--- a/cpp/include/cudf/detail/round.hpp
+++ b/cpp/include/cudf/detail/round.hpp
@@ -35,7 +35,7 @@ std::unique_ptr<column> round(
   column_view const& input,
   int32_t decimal_places,
   rounding_method method,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index af4854965ee..88babe2f397 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -390,7 +390,7 @@ std::unique_ptr<table> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/include/cudf/detail/scatter.hpp b/cpp/include/cudf/detail/scatter.hpp
index 515df255f4a..7c4b04537ea 100644
--- a/cpp/include/cudf/detail/scatter.hpp
+++ b/cpp/include/cudf/detail/scatter.hpp
@@ -63,7 +63,7 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   column_view const& scatter_map,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -76,7 +76,7 @@ std::unique_ptr<table> scatter(
   table_view const& source,
   device_span<size_type const> const scatter_map,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -112,7 +112,7 @@ std::unique_ptr<table> scatter(
   std::vector<std::reference_wrapper<const scalar>> const& source,
   column_view const& indices,
   table_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -127,7 +127,7 @@ std::unique_ptr<table> boolean_mask_scatter(
   table_view const& source,
   table_view const& target,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -143,7 +143,7 @@ std::unique_ptr<table> boolean_mask_scatter(
   std::vector<std::reference_wrapper<const scalar>> const& source,
   table_view const& target,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/sequence.hpp b/cpp/include/cudf/detail/sequence.hpp
index a4bebb1886c..4a9bf5c74e1 100644
--- a/cpp/include/cudf/detail/sequence.hpp
+++ b/cpp/include/cudf/detail/sequence.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<column> sequence(
   size_type size,
   scalar const& init,
   scalar const& step,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -49,7 +49,7 @@ std::unique_ptr<column> sequence(
 std::unique_ptr<column> sequence(
   size_type size,
   scalar const& init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -64,7 +64,7 @@ std::unique_ptr<cudf::column> calendrical_month_sequence(
   size_type size,
   scalar const& init,
   size_type months,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/stream_compaction.hpp b/cpp/include/cudf/detail/stream_compaction.hpp
index 1651e8b33b6..e725718ed22 100644
--- a/cpp/include/cudf/detail/stream_compaction.hpp
+++ b/cpp/include/cudf/detail/stream_compaction.hpp
@@ -36,7 +36,7 @@ std::unique_ptr<table> drop_nulls(
   table_view const& input,
   std::vector<size_type> const& keys,
   cudf::size_type keep_threshold,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -49,7 +49,7 @@ std::unique_ptr<table> drop_nans(
   table_view const& input,
   std::vector<size_type> const& keys,
   cudf::size_type keep_threshold,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -60,7 +60,7 @@ std::unique_ptr<table> drop_nans(
 std::unique_ptr<table> apply_boolean_mask(
   table_view const& input,
   column_view const& boolean_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -149,7 +149,7 @@ rmm::device_uvector<size_type> get_distinct_indices(
 cudf::size_type unique_count(column_view const& input,
                              null_policy null_handling,
                              nan_policy nan_handling,
-                             rmm::cuda_stream_view stream = cudf::get_default_stream());
+                             rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::unique_count(table_view const&, null_equality)
@@ -168,7 +168,7 @@ cudf::size_type unique_count(table_view const& input,
 cudf::size_type distinct_count(column_view const& input,
                                null_policy null_handling,
                                nan_policy nan_handling,
-                               rmm::cuda_stream_view stream = cudf::get_default_stream());
+                               rmm::cuda_stream_view stream);
 
 /**
  * @copydoc cudf::distinct_count(table_view const&, null_equality)
diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index 03e752c102d..115c8ccd90e 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -189,7 +189,7 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
  */
 std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
   column_view const& parent,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -215,7 +215,7 @@ std::tuple<cudf::column_view, std::vector<rmm::device_buffer>> superimpose_paren
  */
 std::tuple<cudf::table_view, std::vector<rmm::device_buffer>> superimpose_parent_nulls(
   table_view const& table,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp
index f1b795e21a9..77f9978ff1b 100644
--- a/cpp/include/cudf/detail/tdigest/tdigest.hpp
+++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp
@@ -139,7 +139,7 @@ std::unique_ptr<column> make_tdigest_column(
   std::unique_ptr<column>&& tdigest_offsets,
   std::unique_ptr<column>&& min_values,
   std::unique_ptr<column>&& max_values,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -153,7 +153,7 @@ std::unique_ptr<column> make_tdigest_column(
  * @returns An empty tdigest column.
  */
 std::unique_ptr<column> make_empty_tdigest_column(
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -167,7 +167,7 @@ std::unique_ptr<column> make_empty_tdigest_column(
  * @returns An empty tdigest scalar.
  */
 std::unique_ptr<scalar> make_empty_tdigest_scalar(
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/detail/transform.hpp b/cpp/include/cudf/detail/transform.hpp
index 26cdf917cda..8e19ebb8da7 100644
--- a/cpp/include/cudf/detail/transform.hpp
+++ b/cpp/include/cudf/detail/transform.hpp
@@ -34,7 +34,7 @@ std::unique_ptr<column> transform(
   std::string const& unary_udf,
   data_type output_type,
   bool is_ptx,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -45,7 +45,7 @@ std::unique_ptr<column> transform(
 std::unique_ptr<column> compute_column(
   table_view const table,
   ast::operation const& expr,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -55,7 +55,7 @@ std::unique_ptr<column> compute_column(
  */
 std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -65,7 +65,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, size_type> nans_to_nulls(
  */
 std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
   column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -75,7 +75,7 @@ std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
  */
 std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
   cudf::table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -86,7 +86,7 @@ std::pair<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::column>> encode(
 std::pair<std::unique_ptr<column>, table_view> one_hot_encode(
   column_view const& input,
   column_view const& categories,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -98,7 +98,7 @@ std::unique_ptr<column> mask_to_bools(
   bitmask_type const* null_mask,
   size_type begin_bit,
   size_type end_bit,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -108,7 +108,7 @@ std::unique_ptr<column> mask_to_bools(
  */
 std::unique_ptr<column> row_bit_count(
   table_view const& t,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/transpose.hpp b/cpp/include/cudf/detail/transpose.hpp
index 14f80a99de9..0470d625edc 100644
--- a/cpp/include/cudf/detail/transpose.hpp
+++ b/cpp/include/cudf/detail/transpose.hpp
@@ -30,7 +30,7 @@ namespace detail {
  */
 std::pair<std::unique_ptr<column>, table_view> transpose(
   table_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/unary.hpp b/cpp/include/cudf/detail/unary.hpp
index c92b4f7683b..0e1c047d9b0 100644
--- a/cpp/include/cudf/detail/unary.hpp
+++ b/cpp/include/cudf/detail/unary.hpp
@@ -50,7 +50,7 @@ std::unique_ptr<column> true_if(
   InputIterator end,
   size_type size,
   Predicate p,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto output =
@@ -71,7 +71,7 @@ std::unique_ptr<column> true_if(
 std::unique_ptr<cudf::column> unary_operation(
   cudf::column_view const& input,
   cudf::unary_operator op,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -82,7 +82,7 @@ std::unique_ptr<cudf::column> unary_operation(
 std::unique_ptr<column> cast(
   column_view const& input,
   data_type type,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -92,7 +92,7 @@ std::unique_ptr<column> cast(
  */
 std::unique_ptr<column> is_nan(
   cudf::column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -102,7 +102,7 @@ std::unique_ptr<column> is_nan(
  */
 std::unique_ptr<column> is_not_nan(
   cudf::column_view const& input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/detail/utilities/cuda.cuh b/cpp/include/cudf/detail/utilities/cuda.cuh
index 02564ea1343..cdbc26701d1 100644
--- a/cpp/include/cudf/detail/utilities/cuda.cuh
+++ b/cpp/include/cudf/detail/utilities/cuda.cuh
@@ -170,8 +170,7 @@ __global__ void single_thread_kernel(F f)
  * @param stream CUDA stream used for the kernel launch
  */
 template <class Functor>
-void device_single_thread(Functor functor,
-                          rmm::cuda_stream_view stream = cudf::get_default_stream())
+void device_single_thread(Functor functor, rmm::cuda_stream_view stream)
 {
   single_thread_kernel<<<1, 1, 0, stream.value()>>>(functor);
 }
diff --git a/cpp/include/cudf/detail/utilities/vector_factories.hpp b/cpp/include/cudf/detail/utilities/vector_factories.hpp
index d59ecea8bb0..75e5222ab97 100644
--- a/cpp/include/cudf/detail/utilities/vector_factories.hpp
+++ b/cpp/include/cudf/detail/utilities/vector_factories.hpp
@@ -72,7 +72,7 @@ rmm::device_uvector<T> make_zeroed_device_uvector_async(
 template <typename T>
 rmm::device_uvector<T> make_zeroed_device_uvector_sync(
   std::size_t size,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   rmm::device_uvector<T> ret(size, stream, mr);
@@ -148,7 +148,7 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_async(
   device_span<T const> source_data,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   rmm::device_uvector<T> ret(source_data.size(), stream, mr);
@@ -201,7 +201,7 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_async(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_sync(
   host_span<T const> source_data,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto ret = make_device_uvector_async(source_data, stream, mr);
@@ -228,7 +228,7 @@ template <
     std::is_convertible_v<Container, host_span<typename Container::value_type const>>>* = nullptr>
 rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
   Container const& c,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_device_uvector_sync(host_span<typename Container::value_type const>{c}, stream, mr);
@@ -249,7 +249,7 @@ rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
 template <typename T>
 rmm::device_uvector<T> make_device_uvector_sync(
   device_span<T const> source_data,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto ret = make_device_uvector_async(source_data, stream, mr);
@@ -276,7 +276,7 @@ template <
     std::is_convertible_v<Container, device_span<typename Container::value_type const>>>* = nullptr>
 rmm::device_uvector<typename Container::value_type> make_device_uvector_sync(
   Container const& c,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_device_uvector_sync(device_span<typename Container::value_type const>{c}, stream, mr);
@@ -366,8 +366,8 @@ template <
   typename Container,
   std::enable_if_t<
     std::is_convertible_v<Container, device_span<typename Container::value_type const>>>* = nullptr>
-std::vector<typename Container::value_type> make_std_vector_sync(
-  Container const& c, rmm::cuda_stream_view stream = cudf::get_default_stream())
+std::vector<typename Container::value_type> make_std_vector_sync(Container const& c,
+                                                                 rmm::cuda_stream_view stream)
 {
   return make_std_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
@@ -423,8 +423,7 @@ thrust::host_vector<typename Container::value_type> make_host_vector_async(
  * @return The data copied to the host
  */
 template <typename T>
-thrust::host_vector<T> make_host_vector_sync(
-  device_span<T const> v, rmm::cuda_stream_view stream = cudf::get_default_stream())
+thrust::host_vector<T> make_host_vector_sync(device_span<T const> v, rmm::cuda_stream_view stream)
 {
   auto result = make_host_vector_async(v, stream);
   stream.synchronize();
@@ -448,7 +447,7 @@ template <
   std::enable_if_t<
     std::is_convertible_v<Container, device_span<typename Container::value_type const>>>* = nullptr>
 thrust::host_vector<typename Container::value_type> make_host_vector_sync(
-  Container const& c, rmm::cuda_stream_view stream = cudf::get_default_stream())
+  Container const& c, rmm::cuda_stream_view stream)
 {
   return make_host_vector_sync(device_span<typename Container::value_type const>{c}, stream);
 }
diff --git a/cpp/include/cudf/detail/valid_if.cuh b/cpp/include/cudf/detail/valid_if.cuh
index 56cc73e63e2..04c78bed17d 100644
--- a/cpp/include/cudf/detail/valid_if.cuh
+++ b/cpp/include/cudf/detail/valid_if.cuh
@@ -90,7 +90,7 @@ std::pair<rmm::device_buffer, size_type> valid_if(
   InputIterator begin,
   InputIterator end,
   Predicate p,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(begin <= end, "Invalid range.");
diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp
index e893e9d6499..716caa3e304 100644
--- a/cpp/include/cudf/dictionary/detail/concatenate.hpp
+++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp
@@ -39,7 +39,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp
index 454b8400f87..a16d518dd0d 100644
--- a/cpp/include/cudf/dictionary/detail/encode.hpp
+++ b/cpp/include/cudf/dictionary/detail/encode.hpp
@@ -74,7 +74,7 @@ std::unique_ptr<column> encode(
  */
 std::unique_ptr<column> decode(
   dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp
index a13a5eee6cb..85e2d9a3a85 100644
--- a/cpp/include/cudf/dictionary/detail/replace.hpp
+++ b/cpp/include/cudf/dictionary/detail/replace.hpp
@@ -42,7 +42,7 @@ namespace detail {
 std::unique_ptr<column> replace_nulls(
   dictionary_column_view const& input,
   dictionary_column_view const& replacement,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -60,7 +60,7 @@ std::unique_ptr<column> replace_nulls(
 std::unique_ptr<column> replace_nulls(
   dictionary_column_view const& input,
   scalar const& replacement,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp
index 9cf45eafc7d..2d65b561cd3 100644
--- a/cpp/include/cudf/dictionary/detail/search.hpp
+++ b/cpp/include/cudf/dictionary/detail/search.hpp
@@ -34,7 +34,7 @@ namespace detail {
 std::unique_ptr<scalar> get_index(
   dictionary_column_view const& dictionary,
   scalar const& key,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -59,7 +59,7 @@ std::unique_ptr<scalar> get_index(
 std::unique_ptr<scalar> get_insert_index(
   dictionary_column_view const& dictionary,
   scalar const& key,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index 23681d36ee1..7f78effdd05 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -35,7 +35,7 @@ namespace detail {
 std::unique_ptr<column> add_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& new_keys,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -47,7 +47,7 @@ std::unique_ptr<column> add_keys(
 std::unique_ptr<column> remove_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& keys_to_remove,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -58,7 +58,7 @@ std::unique_ptr<column> remove_keys(
  */
 std::unique_ptr<column> remove_unused_keys(
   dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -70,7 +70,7 @@ std::unique_ptr<column> remove_unused_keys(
 std::unique_ptr<column> set_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& keys,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -81,7 +81,7 @@ std::unique_ptr<column> set_keys(
  */
 std::vector<std::unique_ptr<column>> match_dictionaries(
   cudf::host_span<dictionary_column_view const> input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -105,7 +105,7 @@ std::vector<std::unique_ptr<column>> match_dictionaries(
  */
 std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_dictionaries(
   std::vector<table_view> tables,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/io/detail/avro.hpp b/cpp/include/cudf/io/detail/avro.hpp
index 00665873b67..c141e25f939 100644
--- a/cpp/include/cudf/io/detail/avro.hpp
+++ b/cpp/include/cudf/io/detail/avro.hpp
@@ -39,7 +39,7 @@ namespace avro {
 table_with_metadata read_avro(
   std::unique_ptr<cudf::io::datasource>&& source,
   avro_reader_options const& options,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace avro
diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp
index 920b815ce12..59de2ea2f12 100644
--- a/cpp/include/cudf/io/detail/csv.hpp
+++ b/cpp/include/cudf/io/detail/csv.hpp
@@ -55,7 +55,7 @@ void write_csv(data_sink* sink,
                table_view const& table,
                const table_metadata* metadata,
                csv_writer_options const& options,
-               rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+               rmm::cuda_stream_view stream,
                rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace csv
diff --git a/cpp/include/cudf/io/detail/json.hpp b/cpp/include/cudf/io/detail/json.hpp
index 6d0d23c3c78..42717fe36df 100644
--- a/cpp/include/cudf/io/detail/json.hpp
+++ b/cpp/include/cudf/io/detail/json.hpp
@@ -39,7 +39,7 @@ namespace json {
 table_with_metadata read_json(
   std::vector<std::unique_ptr<cudf::io::datasource>>& sources,
   json_reader_options const& options,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace json
diff --git a/cpp/include/cudf/io/detail/orc.hpp b/cpp/include/cudf/io/detail/orc.hpp
index 10bdf6e3e71..4c78502a21b 100644
--- a/cpp/include/cudf/io/detail/orc.hpp
+++ b/cpp/include/cudf/io/detail/orc.hpp
@@ -74,8 +74,7 @@ class reader {
    *
    * @return The set of columns along with table metadata
    */
-  table_with_metadata read(orc_reader_options const& options,
-                           rmm::cuda_stream_view stream = cudf::get_default_stream());
+  table_with_metadata read(orc_reader_options const& options, rmm::cuda_stream_view stream);
 };
 
 /**
diff --git a/cpp/include/cudf/lists/detail/concatenate.hpp b/cpp/include/cudf/lists/detail/concatenate.hpp
index f2982a67389..5a8b4bc3bf3 100644
--- a/cpp/include/cudf/lists/detail/concatenate.hpp
+++ b/cpp/include/cudf/lists/detail/concatenate.hpp
@@ -45,7 +45,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh
index f53e8ca8033..7db908c5b52 100644
--- a/cpp/include/cudf/lists/detail/gather.cuh
+++ b/cpp/include/cudf/lists/detail/gather.cuh
@@ -320,7 +320,8 @@ std::unique_ptr<column> gather_list_leaf(
 std::unique_ptr<column> segmented_gather(
   lists_column_view const& source_column,
   lists_column_view const& gather_map_list,
-  out_of_bounds_policy bounds_policy  = out_of_bounds_policy::DONT_CHECK,
+  out_of_bounds_policy bounds_policy = out_of_bounds_policy::DONT_CHECK,
+  // Move before bounds_policy?
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh
index c343eea1014..5d89a9be29c 100644
--- a/cpp/include/cudf/lists/detail/scatter.cuh
+++ b/cpp/include/cudf/lists/detail/scatter.cuh
@@ -96,7 +96,7 @@ std::unique_ptr<column> scatter_impl(
   MapIterator scatter_map_end,
   column_view const& source,
   column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(column_types_equal(source, target), "Mismatched column types.");
@@ -169,7 +169,7 @@ std::unique_ptr<column> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto const num_rows = target.size();
@@ -226,7 +226,7 @@ std::unique_ptr<column> scatter(
   MapIterator scatter_map_begin,
   MapIterator scatter_map_end,
   column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto const num_rows = target.size();
diff --git a/cpp/include/cudf/lists/lists_column_factories.hpp b/cpp/include/cudf/lists/lists_column_factories.hpp
index e02fa3fde5f..a6eacb97e91 100644
--- a/cpp/include/cudf/lists/lists_column_factories.hpp
+++ b/cpp/include/cudf/lists/lists_column_factories.hpp
@@ -38,7 +38,7 @@ namespace detail {
 std::unique_ptr<cudf::column> make_lists_column_from_scalar(
   list_scalar const& value,
   size_type size,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp
index 3de97ed69f1..ade28faf645 100644
--- a/cpp/include/cudf/strings/detail/combine.hpp
+++ b/cpp/include/cudf/strings/detail/combine.hpp
@@ -38,7 +38,8 @@ std::unique_ptr<column> concatenate(
   table_view const& strings_columns,
   string_scalar const& separator,
   string_scalar const& narep,
-  separator_on_nulls separate_nulls   = separator_on_nulls::YES,
+  separator_on_nulls separate_nulls = separator_on_nulls::YES,
+  // Move before separate_nulls?
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -52,7 +53,7 @@ std::unique_ptr<column> join_strings(
   strings_column_view const& strings,
   string_scalar const& separator,
   string_scalar const& narep,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index 76397c15dad..caaeb2afbe7 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -44,7 +44,7 @@ namespace detail {
  */
 std::unique_ptr<column> concatenate(
   host_span<column_view const> columns,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp
index e4ae9917f58..c70952b0962 100644
--- a/cpp/include/cudf/strings/detail/copying.hpp
+++ b/cpp/include/cudf/strings/detail/copying.hpp
@@ -52,7 +52,8 @@ namespace detail {
 std::unique_ptr<cudf::column> copy_slice(
   strings_column_view const& strings,
   size_type start,
-  size_type end                       = -1,
+  size_type end = -1,
+  // Move before end?
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
index e6a2fa8ba4e..1ad9663a614 100644
--- a/cpp/include/cudf/strings/detail/fill.hpp
+++ b/cpp/include/cudf/strings/detail/fill.hpp
@@ -47,7 +47,7 @@ std::unique_ptr<column> fill(
   size_type begin,
   size_type end,
   string_scalar const& value,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp
index 87a1040b67d..8ea579ae5c0 100644
--- a/cpp/include/cudf/strings/detail/json.hpp
+++ b/cpp/include/cudf/strings/detail/json.hpp
@@ -34,7 +34,7 @@ std::unique_ptr<cudf::column> get_json_object(
   cudf::strings_column_view const& col,
   cudf::string_scalar const& json_path,
   get_json_object_options options,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp
index 814188d88c9..a9a6ef00103 100644
--- a/cpp/include/cudf/strings/detail/replace.hpp
+++ b/cpp/include/cudf/strings/detail/replace.hpp
@@ -47,7 +47,8 @@ std::unique_ptr<column> replace(
   strings_column_view const& strings,
   string_scalar const& target,
   string_scalar const& repl,
-  int32_t maxrepl                     = -1,
+  int32_t maxrepl = -1,
+  // Move before maxrepl?
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -59,9 +60,10 @@ std::unique_ptr<column> replace(
  */
 std::unique_ptr<column> replace_slice(
   strings_column_view const& strings,
-  string_scalar const& repl           = string_scalar(""),
-  size_type start                     = 0,
-  size_type stop                      = -1,
+  string_scalar const& repl = string_scalar(""),
+  size_type start           = 0,
+  size_type stop            = -1,
+  // Move before repl?
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
@@ -75,7 +77,7 @@ std::unique_ptr<column> replace(
   strings_column_view const& strings,
   strings_column_view const& targets,
   strings_column_view const& repls,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -98,7 +100,8 @@ std::unique_ptr<column> replace(
  */
 std::unique_ptr<column> replace_nulls(
   strings_column_view const& strings,
-  string_scalar const& repl           = string_scalar(""),
+  string_scalar const& repl = string_scalar(""),
+  // Move before repl?
   rmm::cuda_stream_view stream        = cudf::get_default_stream(),
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index 10641677ea2..c8a90ea538a 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -62,7 +62,7 @@ std::unique_ptr<column> scatter(
   SourceIterator end,
   MapIterator scatter_map,
   strings_column_view const& target,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   if (target.is_empty()) return make_empty_column(type_id::STRING);
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index 4eca9a5a55e..9404ac14775 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -53,7 +53,7 @@ template <typename InputIterator>
 std::unique_ptr<column> make_offsets_child_column(
   InputIterator begin,
   InputIterator end,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(begin < end, "Invalid iterator range");
@@ -121,7 +121,7 @@ auto make_strings_children(
   SizeAndExecuteFunction size_and_exec_fn,
   size_type exec_size,
   size_type strings_count,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto offsets_column = make_numeric_column(
@@ -178,7 +178,7 @@ template <typename SizeAndExecuteFunction>
 auto make_strings_children(
   SizeAndExecuteFunction size_and_exec_fn,
   size_type strings_count,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr);
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index f87932b4608..829e0207110 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -38,7 +38,7 @@ namespace detail {
  */
 std::unique_ptr<column> create_chars_child_column(
   size_type bytes,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -51,7 +51,7 @@ std::unique_ptr<column> create_chars_child_column(
  */
 rmm::device_uvector<string_view> create_string_vector_from_column(
   cudf::strings_column_view const strings,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/include/cudf_test/column_wrapper.hpp b/cpp/include/cudf_test/column_wrapper.hpp
index f9f571c252a..91773b2c3f1 100644
--- a/cpp/include/cudf_test/column_wrapper.hpp
+++ b/cpp/include/cudf_test/column_wrapper.hpp
@@ -732,9 +732,9 @@ class strings_column_wrapper : public detail::column_wrapper {
   {
     auto all_valid        = thrust::make_constant_iterator(true);
     auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, all_valid);
-    auto d_chars          = cudf::detail::make_device_uvector_sync(chars);
-    auto d_offsets        = cudf::detail::make_device_uvector_sync(offsets);
-    wrapped               = cudf::make_strings_column(d_chars, d_offsets);
+    auto d_chars   = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream());
+    auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream());
+    wrapped        = cudf::make_strings_column(d_chars, d_offsets);
   }
 
   /**
@@ -772,10 +772,10 @@ class strings_column_wrapper : public detail::column_wrapper {
     size_type num_strings = std::distance(begin, end);
     auto [chars, offsets] = detail::make_chars_and_offsets(begin, end, v);
     auto null_mask        = detail::make_null_mask_vector(v, v + num_strings);
-    auto d_chars          = cudf::detail::make_device_uvector_sync(chars);
-    auto d_offsets        = cudf::detail::make_device_uvector_sync(offsets);
-    auto d_bitmask        = cudf::detail::make_device_uvector_sync(null_mask);
-    wrapped               = cudf::make_strings_column(d_chars, d_offsets, d_bitmask);
+    auto d_chars   = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream());
+    auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream());
+    auto d_bitmask = cudf::detail::make_device_uvector_sync(null_mask, cudf::get_default_stream());
+    wrapped        = cudf::make_strings_column(d_chars, d_offsets, d_bitmask);
   }
 
   /**
diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
index 6e1982164e5..250f8ea8580 100644
--- a/cpp/include/cudf_test/tdigest_utilities.cuh
+++ b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -217,7 +217,7 @@ void tdigest_simple_all_nulls_aggregation(Func op)
     static_cast<column_view>(values).type(), tdigest_gen{}, op, values, delta);
 
   // NOTE: an empty tdigest column still has 1 row.
-  auto expected = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto expected = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
 }
@@ -508,9 +508,9 @@ template <typename MergeFunc>
 void tdigest_merge_empty(MergeFunc merge_op)
 {
   // 3 empty tdigests all in the same group
-  auto a = cudf::detail::tdigest::make_empty_tdigest_column();
-  auto b = cudf::detail::tdigest::make_empty_tdigest_column();
-  auto c = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto a = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto b = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto c = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
   std::vector<column_view> cols;
   cols.push_back(*a);
   cols.push_back(*b);
@@ -520,7 +520,7 @@ void tdigest_merge_empty(MergeFunc merge_op)
   auto const delta = 1000;
   auto result      = merge_op(*values, delta);
 
-  auto expected = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto expected = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result);
 }
diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index c06e6211654..9c1cdbd6310 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -38,8 +38,9 @@ namespace detail {
 std::unique_ptr<cudf::column> tokenize(
   cudf::strings_column_view const& strings,
   cudf::string_scalar const& delimiter = cudf::string_scalar{""},
-  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
+  // Move before delimiter?
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view
@@ -54,7 +55,7 @@ std::unique_ptr<cudf::column> tokenize(
 std::unique_ptr<cudf::column> tokenize(
   cudf::strings_column_view const& strings,
   cudf::strings_column_view const& delimiters,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
@@ -71,8 +72,9 @@ std::unique_ptr<cudf::column> tokenize(
 std::unique_ptr<cudf::column> count_tokens(
   cudf::strings_column_view const& strings,
   cudf::string_scalar const& delimiter = cudf::string_scalar{""},
-  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource());
+  // Move before delimiter?
+  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /**
  * @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view
@@ -87,7 +89,7 @@ std::unique_ptr<cudf::column> count_tokens(
 std::unique_ptr<cudf::column> count_tokens(
   cudf::strings_column_view const& strings,
   cudf::strings_column_view const& delimiters,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/src/bitmask/null_mask.cu b/cpp/src/bitmask/null_mask.cu
index 6fef15d58d7..958bf21e6df 100644
--- a/cpp/src/bitmask/null_mask.cu
+++ b/cpp/src/bitmask/null_mask.cu
@@ -165,7 +165,7 @@ rmm::device_buffer create_null_mask(size_type size,
 // or null, otherwise;
 void set_null_mask(bitmask_type* bitmask, size_type begin_bit, size_type end_bit, bool valid)
 {
-  return detail::set_null_mask(bitmask, begin_bit, end_bit, valid);
+  return detail::set_null_mask(bitmask, begin_bit, end_bit, valid, cudf::get_default_stream());
 }
 
 namespace detail {
diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu
index 35eb13119f7..ab3cfefd518 100644
--- a/cpp/src/copying/purge_nonempty_nulls.cu
+++ b/cpp/src/copying/purge_nonempty_nulls.cu
@@ -104,7 +104,10 @@ bool may_have_nonempty_nulls(column_view const& input)
 /**
  * @copydoc cudf::has_nonempty_nulls
  */
-bool has_nonempty_nulls(column_view const& input) { return detail::has_nonempty_nulls(input); }
+bool has_nonempty_nulls(column_view const& input)
+{
+  return detail::has_nonempty_nulls(input, cudf::get_default_stream());
+}
 
 /**
  * @copydoc cudf::purge_nonempty_nulls(lists_column_view const&, rmm::mr::device_memory_resource*)
diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu
index 4f17fac3129..8a703959d9e 100644
--- a/cpp/src/dictionary/remove_keys.cu
+++ b/cpp/src/dictionary/remove_keys.cu
@@ -59,7 +59,7 @@ template <typename KeysKeeper>
 std::unique_ptr<column> remove_keys_fn(
   dictionary_column_view const& dictionary_column,
   KeysKeeper keys_to_keep_fn,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   auto const keys_view    = dictionary_column.keys();
@@ -151,7 +151,7 @@ std::unique_ptr<column> remove_keys_fn(
 std::unique_ptr<column> remove_keys(
   dictionary_column_view const& dictionary_column,
   column_view const& keys_to_remove,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls");
@@ -168,7 +168,7 @@ std::unique_ptr<column> remove_keys(
 
 std::unique_ptr<column> remove_unused_keys(
   dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   // locate the keys to remove
diff --git a/cpp/src/hash/concurrent_unordered_map.cuh b/cpp/src/hash/concurrent_unordered_map.cuh
index a268e2ef778..f99aabc56bf 100644
--- a/cpp/src/hash/concurrent_unordered_map.cuh
+++ b/cpp/src/hash/concurrent_unordered_map.cuh
@@ -159,7 +159,7 @@ class concurrent_unordered_map {
    * storage
    */
   static auto create(size_type capacity,
-                     rmm::cuda_stream_view stream     = cudf::get_default_stream(),
+                     rmm::cuda_stream_view stream,
                      const mapped_type unused_element = std::numeric_limits<mapped_type>::max(),
                      const key_type unused_key        = std::numeric_limits<key_type>::max(),
                      const Hasher& hash_function      = hasher(),
@@ -421,8 +421,7 @@ class concurrent_unordered_map {
     }
   }
 
-  void assign_async(const concurrent_unordered_map& other,
-                    rmm::cuda_stream_view stream = cudf::get_default_stream())
+  void assign_async(const concurrent_unordered_map& other, rmm::cuda_stream_view stream)
   {
     if (other.m_capacity <= m_capacity) {
       m_capacity = other.m_capacity;
@@ -440,7 +439,7 @@ class concurrent_unordered_map {
                                   stream.value()));
   }
 
-  void clear_async(rmm::cuda_stream_view stream = cudf::get_default_stream())
+  void clear_async(rmm::cuda_stream_view stream)
   {
     constexpr int block_size = 128;
     init_hashtbl<<<((m_capacity - 1) / block_size) + 1, block_size, 0, stream.value()>>>(
@@ -455,7 +454,7 @@ class concurrent_unordered_map {
     }
   }
 
-  void prefetch(const int dev_id, rmm::cuda_stream_view stream = cudf::get_default_stream())
+  void prefetch(const int dev_id, rmm::cuda_stream_view stream)
   {
     cudaPointerAttributes hashtbl_values_ptr_attributes;
     cudaError_t status = cudaPointerGetAttributes(&hashtbl_values_ptr_attributes, m_hashtbl_values);
@@ -475,7 +474,7 @@ class concurrent_unordered_map {
    *
    * @param stream CUDA stream used for device memory operations and kernel launches.
    */
-  void destroy(rmm::cuda_stream_view stream = cudf::get_default_stream())
+  void destroy(rmm::cuda_stream_view stream)
   {
     m_allocator.deallocate(m_hashtbl_values, m_capacity, stream);
     delete this;
@@ -516,7 +515,7 @@ class concurrent_unordered_map {
                            const Hasher& hash_function,
                            const Equality& equal,
                            const allocator_type& allocator,
-                           rmm::cuda_stream_view stream = cudf::get_default_stream())
+                           rmm::cuda_stream_view stream)
     : m_hf(hash_function),
       m_equal(equal),
       m_allocator(allocator),
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 968d3827bfe..f58a34a5b7b 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -349,7 +349,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_
   auto reader      = std::make_unique<detail_orc::reader>(
     std::move(datasources), options, cudf::get_default_stream(), mr);
 
-  return reader->read(options);
+  return reader->read(options, cudf::get_default_stream());
 }
 
 /**
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index 36329db3e88..fb7091018a6 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -73,7 +73,7 @@ auto print_vec = [](auto const& cpu, auto const name, auto converter) {
 
 void print_tree(host_span<SymbolT const> input,
                 tree_meta_t const& d_gpu_tree,
-                rmm::cuda_stream_view stream = cudf::get_default_stream())
+                rmm::cuda_stream_view stream)
 {
   print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.node_categories, stream),
             "node_categories",
diff --git a/cpp/src/join/conditional_join.hpp b/cpp/src/join/conditional_join.hpp
index 23ecfebc52a..7c329cd8e17 100644
--- a/cpp/src/join/conditional_join.hpp
+++ b/cpp/src/join/conditional_join.hpp
@@ -68,7 +68,7 @@ std::size_t compute_conditional_join_output_size(
   table_view const& right,
   ast::expression const& binary_predicate,
   join_kind JoinKind,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 }  // namespace detail
diff --git a/cpp/src/lists/combine/concatenate_rows.cu b/cpp/src/lists/combine/concatenate_rows.cu
index 0a3ff333d6c..8b006548391 100644
--- a/cpp/src/lists/combine/concatenate_rows.cu
+++ b/cpp/src/lists/combine/concatenate_rows.cu
@@ -245,7 +245,8 @@ std::unique_ptr<column> concatenate_rows(table_view const& input,
            row_null_counts = row_null_counts.data()] __device__(size_t i) -> size_type {
             auto const row_index = i % num_rows;
             return row_null_counts[row_index] != num_columns;
-          });
+          },
+          stream);
       }
       // NULLIFY_OUTPUT_ROW.  Output row is nullfied if any input row is null
       return cudf::detail::valid_if(
@@ -255,7 +256,8 @@ std::unique_ptr<column> concatenate_rows(table_view const& input,
          row_null_counts = row_null_counts.data()] __device__(size_t i) -> size_type {
           auto const row_index = i % num_rows;
           return row_null_counts[row_index] == 0;
-        });
+        },
+        stream);
     }();
     concat->set_null_mask(std::move(null_mask), null_count);
   }
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index a7d7e14a193..d7a195c088c 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -142,7 +142,7 @@ std::unique_ptr<scalar> reduce(
   reduce_aggregation const& agg,
   data_type output_dtype,
   std::optional<std::reference_wrapper<scalar const>> init,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_EXPECTS(!init.has_value() || col.type() == init.value().get().type(),
@@ -157,7 +157,7 @@ std::unique_ptr<scalar> reduce(
   // handcraft the default scalar with input column.
   if (col.size() <= col.null_count()) {
     if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) {
-      return detail::tdigest::make_empty_tdigest_scalar();
+      return detail::tdigest::make_empty_tdigest_scalar(stream);
     }
     if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) {
       // Under some circumstance, the output type will become the List of input type,
diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index 96fcd8b53fc..09bd1ff7f5c 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -187,7 +187,7 @@ cudf::size_type distinct_count(column_view const& input,
                                nan_policy nan_handling)
 {
   CUDF_FUNC_RANGE();
-  return detail::distinct_count(input, null_handling, nan_handling);
+  return detail::distinct_count(input, null_handling, nan_handling, cudf::get_default_stream());
 }
 
 cudf::size_type distinct_count(table_view const& input, null_equality nulls_equal)
diff --git a/cpp/src/stream_compaction/unique_count.cu b/cpp/src/stream_compaction/unique_count.cu
index 8a793ef4729..8363ee8120b 100644
--- a/cpp/src/stream_compaction/unique_count.cu
+++ b/cpp/src/stream_compaction/unique_count.cu
@@ -127,7 +127,7 @@ cudf::size_type unique_count(column_view const& input,
                              nan_policy nan_handling)
 {
   CUDF_FUNC_RANGE();
-  return detail::unique_count(input, null_handling, nan_handling);
+  return detail::unique_count(input, null_handling, nan_handling, cudf::get_default_stream());
 }
 
 cudf::size_type unique_count(table_view const& input, null_equality nulls_equal)
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index 34175f2ec6c..04cb074c016 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -106,7 +106,7 @@ std::unique_ptr<column> replace_re(
   string_scalar const& replacement,
   std::optional<size_type> max_replace_count,
   regex_flags const flags,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu
index 8c5916283be..ca30eb3f6d8 100644
--- a/cpp/src/strings/strings_column_factories.cu
+++ b/cpp/src/strings/strings_column_factories.cu
@@ -61,7 +61,7 @@ std::unique_ptr<column> make_strings_column(
   device_span<size_type> offsets,
   size_type null_count,
   rmm::device_buffer&& null_mask,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
+  rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
 {
   CUDF_FUNC_RANGE();
diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp
index bf4216b6983..c924163daf2 100644
--- a/cpp/src/structs/utilities.cpp
+++ b/cpp/src/structs/utilities.cpp
@@ -118,8 +118,8 @@ struct table_flattener {
    */
   void superimpose_nulls(table_view const& input_table)
   {
-    auto [table, null_masks]     = superimpose_parent_nulls(input_table);
-    this->input                  = table;
+    auto [table, null_masks] = superimpose_parent_nulls(input_table, cudf::get_default_stream());
+    this->input              = table;
     this->superimposed_nullmasks = std::move(null_masks);
   }
 
diff --git a/cpp/tests/bitmask/bitmask_tests.cpp b/cpp/tests/bitmask/bitmask_tests.cpp
index e4fdf2ddabb..f180ff64115 100644
--- a/cpp/tests/bitmask/bitmask_tests.cpp
+++ b/cpp/tests/bitmask/bitmask_tests.cpp
@@ -86,7 +86,8 @@ TEST_F(CountBitmaskTest, NullMask)
 rmm::device_uvector<cudf::bitmask_type> make_mask(cudf::size_type size, bool fill_valid = false)
 {
   if (!fill_valid) {
-    return cudf::detail::make_zeroed_device_uvector_sync<cudf::bitmask_type>(size);
+    return cudf::detail::make_zeroed_device_uvector_sync<cudf::bitmask_type>(
+      size, cudf::get_default_stream());
   } else {
     auto ret = rmm::device_uvector<cudf::bitmask_type>(size, cudf::get_default_stream());
     CUDF_CUDA_TRY(cudaMemsetAsync(ret.data(),
diff --git a/cpp/tests/bitmask/is_element_valid_tests.cpp b/cpp/tests/bitmask/is_element_valid_tests.cpp
index 888d0103f03..a369c179bd9 100644
--- a/cpp/tests/bitmask/is_element_valid_tests.cpp
+++ b/cpp/tests/bitmask/is_element_valid_tests.cpp
@@ -29,11 +29,11 @@ struct IsElementValidTest : public cudf::test::BaseFixture {
 TEST_F(IsElementValidTest, IsElementValidBasic)
 {
   cudf::test::fixed_width_column_wrapper<int32_t> col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1});
-  EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 0));
-  EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 1));
-  EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 2));
-  EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 3));
-  EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 4));
+  EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 0, cudf::get_default_stream()));
+  EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 1, cudf::get_default_stream()));
+  EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 2, cudf::get_default_stream()));
+  EXPECT_FALSE(cudf::detail::is_element_valid_sync(col, 3, cudf::get_default_stream()));
+  EXPECT_TRUE(cudf::detail::is_element_valid_sync(col, 4, cudf::get_default_stream()));
 }
 
 TEST_F(IsElementValidTest, IsElementValidLarge)
@@ -46,7 +46,7 @@ TEST_F(IsElementValidTest, IsElementValidLarge)
   cudf::test::fixed_width_column_wrapper<int32_t> col(val, val + num_rows, valid);
 
   for (int i = 0; i < num_rows; i++) {
-    EXPECT_EQ(cudf::detail::is_element_valid_sync(col, i), filter(i));
+    EXPECT_EQ(cudf::detail::is_element_valid_sync(col, i, cudf::get_default_stream()), filter(i));
   }
 }
 
@@ -55,16 +55,16 @@ TEST_F(IsElementValidTest, IsElementValidOffset)
   cudf::test::fixed_width_column_wrapper<int32_t> col({1, 1, 1, 1, 1}, {1, 0, 0, 0, 1});
   {
     auto offset_col = cudf::slice(col, {1, 5}).front();
-    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0));
-    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1));
-    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 2));
-    EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 3));
+    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0, cudf::get_default_stream()));
+    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1, cudf::get_default_stream()));
+    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 2, cudf::get_default_stream()));
+    EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 3, cudf::get_default_stream()));
   }
   {
     auto offset_col = cudf::slice(col, {2, 5}).front();
-    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0));
-    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1));
-    EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 2));
+    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 0, cudf::get_default_stream()));
+    EXPECT_FALSE(cudf::detail::is_element_valid_sync(offset_col, 1, cudf::get_default_stream()));
+    EXPECT_TRUE(cudf::detail::is_element_valid_sync(offset_col, 2, cudf::get_default_stream()));
   }
 }
 
@@ -80,6 +80,7 @@ TEST_F(IsElementValidTest, IsElementValidOffsetLarge)
   auto offset_col = cudf::slice(col, {offset, num_rows}).front();
 
   for (int i = 0; i < offset_col.size(); i++) {
-    EXPECT_EQ(cudf::detail::is_element_valid_sync(offset_col, i), filter(i + offset));
+    EXPECT_EQ(cudf::detail::is_element_valid_sync(offset_col, i, cudf::get_default_stream()),
+              filter(i + offset));
   }
 }
diff --git a/cpp/tests/bitmask/valid_if_tests.cu b/cpp/tests/bitmask/valid_if_tests.cu
index 816a89500da..cdc453be8e4 100644
--- a/cpp/tests/bitmask/valid_if_tests.cu
+++ b/cpp/tests/bitmask/valid_if_tests.cu
@@ -40,8 +40,10 @@ struct all_null {
 
 TEST_F(ValidIfTest, EmptyRange)
 {
-  auto actual = cudf::detail::valid_if(
-    thrust::make_counting_iterator(0), thrust::make_counting_iterator(0), odds_valid{});
+  auto actual        = cudf::detail::valid_if(thrust::make_counting_iterator(0),
+                                       thrust::make_counting_iterator(0),
+                                       odds_valid{},
+                                       cudf::get_default_stream());
   auto const& buffer = actual.first;
   EXPECT_EQ(0u, buffer.size());
   EXPECT_EQ(nullptr, buffer.data());
@@ -50,18 +52,21 @@ TEST_F(ValidIfTest, EmptyRange)
 
 TEST_F(ValidIfTest, InvalidRange)
 {
-  EXPECT_THROW(
-    cudf::detail::valid_if(
-      thrust::make_counting_iterator(1), thrust::make_counting_iterator(0), odds_valid{}),
-    cudf::logic_error);
+  EXPECT_THROW(cudf::detail::valid_if(thrust::make_counting_iterator(1),
+                                      thrust::make_counting_iterator(0),
+                                      odds_valid{},
+                                      cudf::get_default_stream()),
+               cudf::logic_error);
 }
 
 TEST_F(ValidIfTest, OddsValid)
 {
   auto iter     = cudf::detail::make_counting_transform_iterator(0, odds_valid{});
   auto expected = cudf::test::detail::make_null_mask(iter, iter + 10000);
-  auto actual   = cudf::detail::valid_if(
-    thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), odds_valid{});
+  auto actual   = cudf::detail::valid_if(thrust::make_counting_iterator(0),
+                                       thrust::make_counting_iterator(10000),
+                                       odds_valid{},
+                                       cudf::get_default_stream());
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size());
   EXPECT_EQ(5000, actual.second);
 }
@@ -70,8 +75,10 @@ TEST_F(ValidIfTest, AllValid)
 {
   auto iter     = cudf::detail::make_counting_transform_iterator(0, all_valid{});
   auto expected = cudf::test::detail::make_null_mask(iter, iter + 10000);
-  auto actual   = cudf::detail::valid_if(
-    thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), all_valid{});
+  auto actual   = cudf::detail::valid_if(thrust::make_counting_iterator(0),
+                                       thrust::make_counting_iterator(10000),
+                                       all_valid{},
+                                       cudf::get_default_stream());
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size());
   EXPECT_EQ(0, actual.second);
 }
@@ -80,8 +87,10 @@ TEST_F(ValidIfTest, AllNull)
 {
   auto iter     = cudf::detail::make_counting_transform_iterator(0, all_null{});
   auto expected = cudf::test::detail::make_null_mask(iter, iter + 10000);
-  auto actual   = cudf::detail::valid_if(
-    thrust::make_counting_iterator(0), thrust::make_counting_iterator(10000), all_null{});
+  auto actual   = cudf::detail::valid_if(thrust::make_counting_iterator(0),
+                                       thrust::make_counting_iterator(10000),
+                                       all_null{},
+                                       cudf::get_default_stream());
   CUDF_TEST_EXPECT_EQUAL_BUFFERS(expected.data(), actual.first.data(), expected.size());
   EXPECT_EQ(10000, actual.second);
 }
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index 08037b78a70..a8abaa33ac3 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -96,7 +96,8 @@ TYPED_TEST(GatherTest, GatherDetailInvalidIndexTest)
     cudf::detail::gather(source_table,
                          gather_map,
                          cudf::out_of_bounds_policy::NULLIFY,
-                         cudf::detail::negative_index_policy::NOT_ALLOWED);
+                         cudf::detail::negative_index_policy::NOT_ALLOWED,
+                         cudf::get_default_stream());
 
   auto expect_data =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i % 2) ? 0 : i; });
diff --git a/cpp/tests/copying/gather_list_tests.cpp b/cpp/tests/copying/gather_list_tests.cpp
index 1caecb558e2..b0a0f99361a 100644
--- a/cpp/tests/copying/gather_list_tests.cpp
+++ b/cpp/tests/copying/gather_list_tests.cpp
@@ -266,7 +266,8 @@ TYPED_TEST(GatherTestListTyped, GatherDetailInvalidIndex)
     auto results = cudf::detail::gather(source_table,
                                         gather_map,
                                         cudf::out_of_bounds_policy::NULLIFY,
-                                        cudf::detail::negative_index_policy::NOT_ALLOWED);
+                                        cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                        cudf::get_default_stream());
 
     std::vector<int32_t> expected_validity{1, 0, 0, 1};
     LCW<T> expected{{{{2, 3}, {4, 5}},
diff --git a/cpp/tests/copying/gather_str_tests.cpp b/cpp/tests/copying/gather_str_tests.cpp
index 4e4e9619fbf..3db2ce399cc 100644
--- a/cpp/tests/copying/gather_str_tests.cpp
+++ b/cpp/tests/copying/gather_str_tests.cpp
@@ -86,7 +86,8 @@ TEST_F(GatherTestStr, Gather)
   auto results = cudf::detail::gather(source_table,
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
-                                      cudf::detail::negative_index_policy::NOT_ALLOWED);
+                                      cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                      cudf::get_default_stream());
 
   std::vector<const char*> h_expected;
   std::vector<int32_t> expected_validity;
@@ -116,7 +117,8 @@ TEST_F(GatherTestStr, GatherDontCheckOutOfBounds)
   auto results = cudf::detail::gather(source_table,
                                       gather_map,
                                       cudf::out_of_bounds_policy::DONT_CHECK,
-                                      cudf::detail::negative_index_policy::NOT_ALLOWED);
+                                      cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                      cudf::get_default_stream());
 
   std::vector<const char*> h_expected;
   for (auto itr = h_map.begin(); itr != h_map.end(); ++itr) {
@@ -134,7 +136,8 @@ TEST_F(GatherTestStr, GatherEmptyMapStringsColumn)
   auto results = cudf::detail::gather(cudf::table_view({zero_size_strings_column}),
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
-                                      cudf::detail::negative_index_policy::NOT_ALLOWED);
+                                      cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                      cudf::get_default_stream());
   cudf::test::expect_column_empty(results->get_column(0).view());
 }
 
@@ -147,6 +150,7 @@ TEST_F(GatherTestStr, GatherZeroSizeStringsColumn)
   auto results = cudf::detail::gather(cudf::table_view({zero_size_strings_column}),
                                       gather_map,
                                       cudf::out_of_bounds_policy::NULLIFY,
-                                      cudf::detail::negative_index_policy::NOT_ALLOWED);
+                                      cudf::detail::negative_index_policy::NOT_ALLOWED,
+                                      cudf::get_default_stream());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, results->get_column(0).view());
 }
diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
index 77fd3f66ee5..b917386648a 100644
--- a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
+++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
@@ -75,7 +75,8 @@ TEST_F(PurgeNonEmptyNullsTest, SingleLevelList)
   EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
 
   // Set nullmask, post construction.
-  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 2, 3, false);
+  cudf::detail::set_null_mask(
+    input->mutable_view().null_mask(), 2, 3, false, cudf::get_default_stream());
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
@@ -157,7 +158,8 @@ TEST_F(PurgeNonEmptyNullsTest, TwoLevelList)
   EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
 
   // Set nullmask, post construction.
-  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 3, 4, false);
+  cudf::detail::set_null_mask(
+    input->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream());
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
@@ -211,7 +213,8 @@ TEST_F(PurgeNonEmptyNullsTest, ThreeLevelList)
   EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
 
   // Set nullmask, post construction.
-  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 3, 4, false);
+  cudf::detail::set_null_mask(
+    input->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream());
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
@@ -264,7 +267,8 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
   EXPECT_FALSE(cudf::has_nonempty_nulls(*input));
 
   // Set nullmask, post construction.
-  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 2, 3, false);
+  cudf::detail::set_null_mask(
+    input->mutable_view().null_mask(), 2, 3, false, cudf::get_default_stream());
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
@@ -350,7 +354,8 @@ TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings)
   EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));
 
   // Set lists nullmask, post construction.
-  cudf::detail::set_null_mask(lists->mutable_view().null_mask(), 2, 3, false);
+  cudf::detail::set_null_mask(
+    lists->mutable_view().null_mask(), 2, 3, false, cudf::get_default_stream());
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));
 
diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp
index e3a003c51d1..180125b7880 100644
--- a/cpp/tests/copying/segmented_gather_list_tests.cpp
+++ b/cpp/tests/copying/segmented_gather_list_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -306,7 +306,8 @@ TYPED_TEST(SegmentedGatherTest, GatherOnNonCompactedNullLists)
   auto const input = list.release();
 
   // Set non-empty list row at index 5 to null.
-  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 5, 6, false);
+  cudf::detail::set_null_mask(
+    input->mutable_view().null_mask(), 5, 6, false, cudf::get_default_stream());
 
   auto const gather_map = LCW<int>{{-1, 2, 1, -4}, {0}, {-2, 1}, {0, 2, 1}, {}, {0}, {1, 2}};
   auto const expected =
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index fd43690dcff..43874b84114 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -141,8 +141,9 @@ struct AtomicsTest : public cudf::test::BaseFixture {
     result_init[4] = result_init[1];
     result_init[5] = result_init[2];
 
-    auto dev_data   = cudf::detail::make_device_uvector_sync(v);
-    auto dev_result = cudf::detail::make_device_uvector_sync(result_init);
+    auto dev_data = cudf::detail::make_device_uvector_sync(v, cudf::get_default_stream());
+    auto dev_result =
+      cudf::detail::make_device_uvector_sync(result_init, cudf::get_default_stream());
 
     if (block_size == 0) { block_size = vec_size; }
 
@@ -154,7 +155,7 @@ struct AtomicsTest : public cudf::test::BaseFixture {
         dev_result.data(), dev_data.data(), vec_size);
     }
 
-    auto host_result = cudf::detail::make_host_vector_sync(dev_result);
+    auto host_result = cudf::detail::make_host_vector_sync(dev_result, cudf::get_default_stream());
 
     CUDF_CHECK_CUDA(cudf::get_default_stream().value());
 
@@ -293,15 +294,15 @@ struct AtomicsBitwiseOpTest : public cudf::test::BaseFixture {
     exact[2] = std::accumulate(
       v.begin(), v.end(), identity[2], [](T acc, uint64_t i) { return acc ^ T(i); });
 
-    auto dev_result = cudf::detail::make_device_uvector_sync(identity);
-    auto dev_data   = cudf::detail::make_device_uvector_sync(v);
+    auto dev_result = cudf::detail::make_device_uvector_sync(identity, cudf::get_default_stream());
+    auto dev_data   = cudf::detail::make_device_uvector_sync(v, cudf::get_default_stream());
 
     if (block_size == 0) { block_size = vec_size; }
 
     gpu_atomic_bitwiseOp_test<T><<<grid_size, block_size, 0, cudf::get_default_stream().value()>>>(
       reinterpret_cast<T*>(dev_result.data()), reinterpret_cast<T*>(dev_data.data()), vec_size);
 
-    auto host_result = cudf::detail::make_host_vector_sync(dev_result);
+    auto host_result = cudf::detail::make_host_vector_sync(dev_result, cudf::get_default_stream());
 
     CUDF_CHECK_CUDA(cudf::get_default_stream().value());
 
diff --git a/cpp/tests/dictionary/search_test.cpp b/cpp/tests/dictionary/search_test.cpp
index 53eb429f4b6..8b77d71593d 100644
--- a/cpp/tests/dictionary/search_test.cpp
+++ b/cpp/tests/dictionary/search_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,7 +35,8 @@ TEST_F(DictionarySearchTest, StringsColumn)
 
   result = cudf::dictionary::get_index(dictionary, cudf::string_scalar("eee"));
   EXPECT_FALSE(result->is_valid());
-  result   = cudf::dictionary::detail::get_insert_index(dictionary, cudf::string_scalar("eee"));
+  result = cudf::dictionary::detail::get_insert_index(
+    dictionary, cudf::string_scalar("eee"), cudf::get_default_stream());
   n_result = dynamic_cast<cudf::numeric_scalar<uint32_t>*>(result.get());
   EXPECT_EQ(uint32_t{5}, n_result->value());
 }
@@ -51,7 +52,8 @@ TEST_F(DictionarySearchTest, WithNulls)
 
   result = cudf::dictionary::get_index(dictionary, cudf::numeric_scalar<int64_t>(5));
   EXPECT_FALSE(result->is_valid());
-  result = cudf::dictionary::detail::get_insert_index(dictionary, cudf::numeric_scalar<int64_t>(5));
+  result = cudf::dictionary::detail::get_insert_index(
+    dictionary, cudf::numeric_scalar<int64_t>(5), cudf::get_default_stream());
   n_result = dynamic_cast<cudf::numeric_scalar<uint32_t>*>(result.get());
   EXPECT_EQ(uint32_t{1}, n_result->value());
 }
@@ -62,7 +64,7 @@ TEST_F(DictionarySearchTest, EmptyColumn)
   cudf::numeric_scalar<int64_t> key(7);
   auto result = cudf::dictionary::get_index(dictionary, key);
   EXPECT_FALSE(result->is_valid());
-  result = cudf::dictionary::detail::get_insert_index(dictionary, key);
+  result = cudf::dictionary::detail::get_insert_index(dictionary, key, cudf::get_default_stream());
   EXPECT_FALSE(result->is_valid());
 }
 
@@ -71,5 +73,7 @@ TEST_F(DictionarySearchTest, Errors)
   cudf::test::dictionary_column_wrapper<int64_t> dictionary({1, 2, 3});
   cudf::numeric_scalar<double> key(7);
   EXPECT_THROW(cudf::dictionary::get_index(dictionary, key), cudf::logic_error);
-  EXPECT_THROW(cudf::dictionary::detail::get_insert_index(dictionary, key), cudf::logic_error);
+  EXPECT_THROW(
+    cudf::dictionary::detail::get_insert_index(dictionary, key, cudf::get_default_stream()),
+    cudf::logic_error);
 }
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index 394229b460e..ab9970dc370 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -83,7 +83,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
   using decimal32 = fixed_point<int32_t, Radix::BASE_10>;
 
   std::vector<decimal32> vec1(1000, decimal32{1, scale_type{-2}});
-  auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1);
+  auto d_vec1 = cudf::detail::make_device_uvector_sync(vec1, cudf::get_default_stream());
 
   auto const sum = thrust::reduce(rmm::exec_policy(cudf::get_default_stream()),
                                   std::cbegin(d_vec1),
@@ -96,7 +96,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
   //       change inclusive scan to run on device (avoid copying to host)
   thrust::inclusive_scan(std::cbegin(vec1), std::cend(vec1), std::begin(vec1));
 
-  d_vec1 = cudf::detail::make_device_uvector_sync(vec1);
+  d_vec1 = cudf::detail::make_device_uvector_sync(vec1, cudf::get_default_stream());
 
   std::vector<int32_t> vec2(1000);
   std::iota(std::begin(vec2), std::end(vec2), 1);
@@ -118,7 +118,7 @@ TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
                     std::begin(d_vec3),
                     cast_to_int32_fn{});
 
-  auto vec3 = cudf::detail::make_std_vector_sync(d_vec3);
+  auto vec3 = cudf::detail::make_std_vector_sync(d_vec3, cudf::get_default_stream());
 
   EXPECT_EQ(vec2, vec3);
 }
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index f9c907767f5..2e4a41a70f8 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -466,13 +466,13 @@ TEST_F(TDigestMergeTest, EmptyGroups)
   cudf::test::fixed_width_column_wrapper<int> keys{0, 0, 0, 0, 0, 0, 0};
   int const delta = 1000;
 
-  auto a = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto a = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
   auto b = cudf::type_dispatcher(
     static_cast<column_view>(values_b).type(), tdigest_gen_grouped{}, keys, values_b, delta);
-  auto c = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto c = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
   auto d = cudf::type_dispatcher(
     static_cast<column_view>(values_d).type(), tdigest_gen_grouped{}, keys, values_d, delta);
-  auto e = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto e = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
 
   std::vector<column_view> cols;
   cols.push_back(*a);
diff --git a/cpp/tests/hash_map/map_test.cu b/cpp/tests/hash_map/map_test.cu
index 2895d3323b8..f38c5b3f58f 100644
--- a/cpp/tests/hash_map/map_test.cu
+++ b/cpp/tests/hash_map/map_test.cu
@@ -57,7 +57,7 @@ struct InsertTest : public cudf::test::BaseFixture {
     const size_t input_size =
       std::min(static_cast<key_type>(size), std::numeric_limits<key_type>::max());
     pairs.resize(input_size, cudf::get_default_stream());
-    map = std::move(map_type::create(compute_hash_table_size(size)));
+    map = std::move(map_type::create(compute_hash_table_size(size), cudf::get_default_stream()));
     cudf::get_default_stream().synchronize();
   }
 
diff --git a/cpp/tests/iterator/iterator_tests.cuh b/cpp/tests/iterator/iterator_tests.cuh
index 7eb2c3d70bb..894e117ba40 100644
--- a/cpp/tests/iterator/iterator_tests.cuh
+++ b/cpp/tests/iterator/iterator_tests.cuh
@@ -87,7 +87,8 @@ struct IteratorTest : public cudf::test::BaseFixture {
   {
     InputIterator d_in_last = d_in + num_items;
     EXPECT_EQ(thrust::distance(d_in, d_in_last), num_items);
-    auto dev_expected = cudf::detail::make_device_uvector_sync(expected);
+    auto dev_expected =
+      cudf::detail::make_device_uvector_sync(expected, cudf::get_default_stream());
 
     // using a temporary vector and calling transform and all_of separately is
     // equivalent to thrust::equal but compiles ~3x faster
@@ -110,7 +111,7 @@ struct IteratorTest : public cudf::test::BaseFixture {
                 rmm::device_uvector<T_output> const& dev_result,
                 const char* msg = nullptr)
   {
-    auto host_result = cudf::detail::make_host_vector_sync(dev_result);
+    auto host_result = cudf::detail::make_host_vector_sync(dev_result, cudf::get_default_stream());
 
     EXPECT_EQ(expected, host_result[0]) << msg;
   }
diff --git a/cpp/tests/iterator/value_iterator_test.cuh b/cpp/tests/iterator/value_iterator_test.cuh
index d68ec4e1471..fa931d34a0e 100644
--- a/cpp/tests/iterator/value_iterator_test.cuh
+++ b/cpp/tests/iterator/value_iterator_test.cuh
@@ -25,7 +25,7 @@ template <typename T>
 void non_null_iterator(IteratorTest<T>& testFixture)
 {
   auto host_array = cudf::test::make_type_param_vector<T>({0, 6, 0, -14, 13, 64, -13, -20, 45});
-  auto dev_array  = cudf::detail::make_device_uvector_sync(host_array);
+  auto dev_array  = cudf::detail::make_device_uvector_sync(host_array, cudf::get_default_stream());
 
   // calculate the expected value by CPU.
   thrust::host_vector<T> replaced_array(host_array);
diff --git a/cpp/tests/iterator/value_iterator_test_strings.cu b/cpp/tests/iterator/value_iterator_test_strings.cu
index 9aa18eb844f..8b4080fa493 100644
--- a/cpp/tests/iterator/value_iterator_test_strings.cu
+++ b/cpp/tests/iterator/value_iterator_test_strings.cu
@@ -30,7 +30,7 @@ auto strings_to_string_views(std::vector<std::string>& input_strings)
   std::vector<int32_t> offsets;
   std::tie(chars, offsets) = cudf::test::detail::make_chars_and_offsets(
     input_strings.begin(), input_strings.end(), all_valid);
-  auto dev_chars = cudf::detail::make_device_uvector_sync(chars);
+  auto dev_chars = cudf::detail::make_device_uvector_sync(chars, cudf::get_default_stream());
 
   // calculate the expected value by CPU. (but contains device pointers)
   thrust::host_vector<cudf::string_view> replaced_array(input_strings.size());
@@ -51,7 +51,7 @@ TEST_F(StringIteratorTest, string_view_null_iterator)
   using T = cudf::string_view;
   std::string zero("zero");
   // the char data has to be in GPU
-  auto initmsg = cudf::detail::make_device_uvector_sync(zero);
+  auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream());
   T init       = T{initmsg.data(), int(initmsg.size())};
 
   // data and valid arrays
@@ -86,7 +86,7 @@ TEST_F(StringIteratorTest, string_view_no_null_iterator)
   // T init = T{"", 0};
   std::string zero("zero");
   // the char data has to be in GPU
-  auto initmsg = cudf::detail::make_device_uvector_sync(zero);
+  auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream());
   T init       = T{initmsg.data(), int(initmsg.size())};
 
   // data array
@@ -110,7 +110,7 @@ TEST_F(StringIteratorTest, string_scalar_iterator)
   // T init = T{"", 0};
   std::string zero("zero");
   // the char data has to be in GPU
-  auto initmsg = cudf::detail::make_device_uvector_sync(zero);
+  auto initmsg = cudf::detail::make_device_uvector_sync(zero, cudf::get_default_stream());
   T init       = T{initmsg.data(), int(initmsg.size())};
 
   // data array
diff --git a/cpp/tests/join/join_tests.cpp b/cpp/tests/join/join_tests.cpp
index fb2eb77512c..ed4326055c6 100644
--- a/cpp/tests/join/join_tests.cpp
+++ b/cpp/tests/join/join_tests.cpp
@@ -1880,7 +1880,8 @@ TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown)
     auto fact_ints    = ints{0, 1, 2, 3, 4};
     auto fact_structs = structs{{fact_ints}, no_nulls()}.release();
     // Now set struct validity to invalidate index#3.
-    cudf::detail::set_null_mask(fact_structs->mutable_view().null_mask(), 3, 4, false);
+    cudf::detail::set_null_mask(
+      fact_structs->mutable_view().null_mask(), 3, 4, false, cudf::get_default_stream());
     // Struct row#3 is null, but Struct.child has a non-null value.
     return make_table(std::move(fact_structs));
   }();
@@ -1896,7 +1897,8 @@ TEST_F(JoinTest, Repro_StructsWithoutNullsPushedDown)
 
   // Note: Join result might not have nulls pushed down, since it's an output of gather().
   // Must superimpose parent nulls before comparisons.
-  auto [superimposed_results, _] = cudf::structs::detail::superimpose_parent_nulls(*result);
+  auto [superimposed_results, _] =
+    cudf::structs::detail::superimpose_parent_nulls(*result, cudf::get_default_stream());
 
   auto const expected = [] {
     auto fact_ints    = ints{0};
diff --git a/cpp/tests/lists/contains_tests.cpp b/cpp/tests/lists/contains_tests.cpp
index 1658843f1a4..2139103500a 100644
--- a/cpp/tests/lists/contains_tests.cpp
+++ b/cpp/tests/lists/contains_tests.cpp
@@ -227,8 +227,9 @@ TYPED_TEST(TypedContainsTest, SlicedLists)
 
   {
     // First Slice.
-    auto sliced_column_1 = cudf::detail::slice(search_space, {1, 8}).front();
-    auto search_key_one  = create_scalar_search_key<T>(1);
+    auto sliced_column_1 =
+      cudf::detail::slice(search_space, {1, 8}, cudf::get_default_stream()).front();
+    auto search_key_one = create_scalar_search_key<T>(1);
     {
       // CONTAINS
       auto result          = cudf::lists::contains(sliced_column_1, *search_key_one);
@@ -259,8 +260,9 @@ TYPED_TEST(TypedContainsTest, SlicedLists)
 
   {
     // Second Slice.
-    auto sliced_column_2 = cudf::detail::slice(search_space, {3, 10}).front();
-    auto search_key_one  = create_scalar_search_key<T>(1);
+    auto sliced_column_2 =
+      cudf::detail::slice(search_space, {3, 10}, cudf::get_default_stream()).front();
+    auto search_key_one = create_scalar_search_key<T>(1);
     {
       // CONTAINS
       auto result          = cudf::lists::contains(sliced_column_2, *search_key_one);
diff --git a/cpp/tests/lists/extract_tests.cpp b/cpp/tests/lists/extract_tests.cpp
index 34c8e044a3f..916dd121253 100644
--- a/cpp/tests/lists/extract_tests.cpp
+++ b/cpp/tests/lists/extract_tests.cpp
@@ -228,7 +228,8 @@ TYPED_TEST(ListsExtractNumericsTest, ExtractElementsFromNonCompactedNullLists)
       .release();
 
   // Set null at index 4.
-  cudf::detail::set_null_mask(input->mutable_view().null_mask(), 4, 5, false);
+  cudf::detail::set_null_mask(
+    input->mutable_view().null_mask(), 4, 5, false, cudf::get_default_stream());
 
   {
     auto result   = cudf::lists::extract_list_element(cudf::lists_column_view{*input}, 0);
diff --git a/cpp/tests/partitioning/hash_partition_test.cpp b/cpp/tests/partitioning/hash_partition_test.cpp
index 1addbca945b..abf4095e4ec 100644
--- a/cpp/tests/partitioning/hash_partition_test.cpp
+++ b/cpp/tests/partitioning/hash_partition_test.cpp
@@ -303,7 +303,8 @@ void run_fixed_width_test(size_t cols,
 
   // Make a table view of the partition numbers
   constexpr cudf::data_type dtype{cudf::type_id::INT32};
-  auto d_partitions = cudf::detail::make_device_uvector_sync(partitions);
+  auto d_partitions =
+    cudf::detail::make_device_uvector_sync(partitions, cudf::get_default_stream());
   cudf::column_view partitions_col(dtype, rows, d_partitions.data());
   cudf::table_view partitions_table({partitions_col});
 
diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cu
index a26f6a99ba6..b02b7d6c336 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cu
+++ b/cpp/tests/quantiles/percentile_approx_test.cu
@@ -271,9 +271,11 @@ void grouped_test(data_type input_type, std::vector<std::pair<int, int>> params)
 
 std::pair<rmm::device_buffer, size_type> make_null_mask(column_view const& col)
 {
-  return cudf::detail::valid_if(thrust::make_counting_iterator<size_type>(0),
-                                thrust::make_counting_iterator<size_type>(col.size()),
-                                [] __device__(size_type i) { return i % 2 == 0; });
+  return cudf::detail::valid_if(
+    thrust::make_counting_iterator<size_type>(0),
+    thrust::make_counting_iterator<size_type>(col.size()),
+    [] __device__(size_type i) { return i % 2 == 0; },
+    cudf::get_default_stream());
 }
 
 void simple_with_nulls_test(data_type input_type, std::vector<std::pair<int, int>> params)
@@ -384,7 +386,7 @@ struct PercentileApproxTest : public cudf::test::BaseFixture {
 
 TEST_F(PercentileApproxTest, EmptyInput)
 {
-  auto empty_ = cudf::detail::tdigest::make_empty_tdigest_column();
+  auto empty_ = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
   cudf::test::fixed_width_column_wrapper<double> percentiles{0.0, 0.25, 0.3};
 
   std::vector<column_view> input;
@@ -428,11 +430,12 @@ TEST_F(PercentileApproxTest, EmptyPercentiles)
   auto result = cudf::percentile_approx(tdv, percentiles);
 
   cudf::test::fixed_width_column_wrapper<offset_type> offsets{0, 0, 0};
-  auto expected = cudf::make_lists_column(2,
-                                          offsets.release(),
-                                          cudf::make_empty_column(type_id::FLOAT64),
-                                          2,
-                                          cudf::detail::create_null_mask(2, mask_state::ALL_NULL));
+  auto expected = cudf::make_lists_column(
+    2,
+    offsets.release(),
+    cudf::make_empty_column(type_id::FLOAT64),
+    2,
+    cudf::detail::create_null_mask(2, mask_state::ALL_NULL, cudf::get_default_stream()));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
 }
diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp
index ef4a9dea48c..2c751a67a63 100644
--- a/cpp/tests/replace/replace_nulls_tests.cpp
+++ b/cpp/tests/replace/replace_nulls_tests.cpp
@@ -679,7 +679,9 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsError)
   auto input_one  = cudf::dictionary::encode(input_one_w);
   auto dict_input = cudf::dictionary_column_view(input_one->view());
   auto dict_repl  = cudf::dictionary_column_view(replacement->view());
-  EXPECT_THROW(cudf::dictionary::detail::replace_nulls(dict_input, dict_repl), cudf::logic_error);
+  EXPECT_THROW(
+    cudf::dictionary::detail::replace_nulls(dict_input, dict_repl, cudf::get_default_stream()),
+    cudf::logic_error);
 }
 
 TEST_F(ReplaceDictionaryTest, ReplaceNullsEmpty)
@@ -687,7 +689,8 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsEmpty)
   cudf::test::fixed_width_column_wrapper<int64_t> input_empty_w({});
   auto input_empty = cudf::dictionary::encode(input_empty_w);
   auto dict_input  = cudf::dictionary_column_view(input_empty->view());
-  auto result      = cudf::dictionary::detail::replace_nulls(dict_input, dict_input);
+  auto result =
+    cudf::dictionary::detail::replace_nulls(dict_input, dict_input, cudf::get_default_stream());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input_empty->view());
 }
 
@@ -696,11 +699,12 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsNoNulls)
   cudf::test::fixed_width_column_wrapper<int8_t> input_w({1, 1, 1});
   auto input      = cudf::dictionary::encode(input_w);
   auto dict_input = cudf::dictionary_column_view(input->view());
-  auto result     = cudf::dictionary::detail::replace_nulls(dict_input, dict_input);
+  auto result =
+    cudf::dictionary::detail::replace_nulls(dict_input, dict_input, cudf::get_default_stream());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input->view());
 
-  result =
-    cudf::dictionary::detail::replace_nulls(dict_input, cudf::numeric_scalar<int64_t>(0, false));
+  result = cudf::dictionary::detail::replace_nulls(
+    dict_input, cudf::numeric_scalar<int64_t>(0, false), cudf::get_default_stream());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input->view());
 }
 
diff --git a/cpp/tests/scalar/scalar_device_view_test.cu b/cpp/tests/scalar/scalar_device_view_test.cu
index 1a0fea7219e..c7365d63e1c 100644
--- a/cpp/tests/scalar/scalar_device_view_test.cu
+++ b/cpp/tests/scalar/scalar_device_view_test.cu
@@ -130,7 +130,7 @@ TEST_F(StringScalarDeviceViewTest, Value)
 
   auto scalar_device_view = cudf::get_scalar_device_view(s);
   rmm::device_scalar<bool> result{cudf::get_default_stream()};
-  auto value_v = cudf::detail::make_device_uvector_sync(value);
+  auto value_v = cudf::detail::make_device_uvector_sync(value, cudf::get_default_stream());
 
   test_string_value<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     scalar_device_view, value_v.data(), value.size(), result.data());
diff --git a/cpp/tests/strings/concatenate_tests.cpp b/cpp/tests/strings/concatenate_tests.cpp
index 1462d4dc73a..387f0f5c997 100644
--- a/cpp/tests/strings/concatenate_tests.cpp
+++ b/cpp/tests/strings/concatenate_tests.cpp
@@ -60,7 +60,7 @@ TEST_F(StringsConcatenateTest, Concatenate)
   strings_columns.push_back(strings2);
   strings_columns.push_back(strings3);
 
-  auto results = cudf::strings::detail::concatenate(strings_columns);
+  auto results = cudf::strings::detail::concatenate(strings_columns, cudf::get_default_stream());
 
   cudf::test::strings_column_wrapper expected(h_strings.begin(), h_strings.end());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
@@ -74,7 +74,7 @@ TEST_F(StringsConcatenateTest, ZeroSizeStringsColumns)
   strings_columns.push_back(zero_size_strings_column);
   strings_columns.push_back(zero_size_strings_column);
   strings_columns.push_back(zero_size_strings_column);
-  auto results = cudf::strings::detail::concatenate(strings_columns);
+  auto results = cudf::strings::detail::concatenate(strings_columns, cudf::get_default_stream());
   cudf::test::expect_column_empty(results->view());
 }
 
@@ -107,6 +107,6 @@ TEST_F(StringsConcatenateTest, ZeroSizeStringsPlusNormal)
                                               h_strings.data() + h_strings.size());
   strings_columns.push_back(strings1);
 
-  auto results = cudf::strings::detail::concatenate(strings_columns);
+  auto results = cudf::strings::detail::concatenate(strings_columns, cudf::get_default_stream());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings1);
 }
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index ba738f7b616..aaacc08d5fb 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -269,8 +269,8 @@ TEST_F(StringsContainsTests, HexTest)
   std::vector<cudf::offset_type> offsets(
     {thrust::make_counting_iterator<cudf::offset_type>(0),
      thrust::make_counting_iterator<cudf::offset_type>(0) + count + 1});
-  auto d_chars   = cudf::detail::make_device_uvector_sync(ascii_chars);
-  auto d_offsets = cudf::detail::make_device_uvector_sync(offsets);
+  auto d_chars   = cudf::detail::make_device_uvector_sync(ascii_chars, cudf::get_default_stream());
+  auto d_offsets = cudf::detail::make_device_uvector_sync(offsets, cudf::get_default_stream());
   auto input     = cudf::make_strings_column(d_chars, d_offsets);
 
   auto strings_view = cudf::strings_column_view(input->view());
diff --git a/cpp/tests/strings/factories_test.cu b/cpp/tests/strings/factories_test.cu
index c27f48a9069..818cb3cadce 100644
--- a/cpp/tests/strings/factories_test.cu
+++ b/cpp/tests/strings/factories_test.cu
@@ -78,7 +78,7 @@ TEST_F(StringsFactoriesTest, CreateColumnFromPair)
     }
     h_offsets[idx + 1] = offset;
   }
-  auto d_strings = cudf::detail::make_device_uvector_sync(strings);
+  auto d_strings = cudf::detail::make_device_uvector_sync(strings, cudf::get_default_stream());
   CUDF_CUDA_TRY(cudaMemcpy(d_buffer.data(), h_buffer.data(), memsize, cudaMemcpyHostToDevice));
   auto column = cudf::make_strings_column(d_strings);
   EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING});
@@ -143,9 +143,9 @@ TEST_F(StringsFactoriesTest, CreateColumnFromOffsets)
   }
 
   std::vector<cudf::bitmask_type> h_nulls{h_null_mask};
-  auto d_buffer  = cudf::detail::make_device_uvector_sync(h_buffer);
-  auto d_offsets = cudf::detail::make_device_uvector_sync(h_offsets);
-  auto d_nulls   = cudf::detail::make_device_uvector_sync(h_nulls);
+  auto d_buffer  = cudf::detail::make_device_uvector_sync(h_buffer, cudf::get_default_stream());
+  auto d_offsets = cudf::detail::make_device_uvector_sync(h_offsets, cudf::get_default_stream());
+  auto d_nulls   = cudf::detail::make_device_uvector_sync(h_nulls, cudf::get_default_stream());
   auto column    = cudf::make_strings_column(d_buffer, d_offsets, d_nulls, null_count);
   EXPECT_EQ(column->type(), cudf::data_type{cudf::type_id::STRING});
   EXPECT_EQ(column->null_count(), null_count);
@@ -184,7 +184,8 @@ TEST_F(StringsFactoriesTest, CreateScalar)
 TEST_F(StringsFactoriesTest, EmptyStringsColumn)
 {
   rmm::device_uvector<char> d_chars{0, cudf::get_default_stream()};
-  auto d_offsets = cudf::detail::make_zeroed_device_uvector_sync<cudf::size_type>(1);
+  auto d_offsets =
+    cudf::detail::make_zeroed_device_uvector_sync<cudf::size_type>(1, cudf::get_default_stream());
   rmm::device_uvector<cudf::bitmask_type> d_nulls{0, cudf::get_default_stream()};
 
   auto results = cudf::make_strings_column(d_chars, d_offsets, d_nulls, 0);
diff --git a/cpp/tests/strings/fill_tests.cpp b/cpp/tests/strings/fill_tests.cpp
index 46f6b633dc5..ed731fe39b4 100644
--- a/cpp/tests/strings/fill_tests.cpp
+++ b/cpp/tests/strings/fill_tests.cpp
@@ -39,7 +39,8 @@ TEST_F(StringsFillTest, Fill)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
   cudf::strings_column_view view(strings);
   {
-    auto results = cudf::strings::detail::fill(view, 1, 5, cudf::string_scalar("zz"));
+    auto results = cudf::strings::detail::fill(
+      view, 1, 5, cudf::string_scalar("zz"), cudf::get_default_stream());
 
     std::vector<const char*> h_expected{"eee", "zz", "zz", "zz", "zz", "bbb", "ééé"};
     cudf::test::strings_column_wrapper expected(
@@ -49,7 +50,8 @@ TEST_F(StringsFillTest, Fill)
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results = cudf::strings::detail::fill(view, 2, 4, cudf::string_scalar("", false));
+    auto results = cudf::strings::detail::fill(
+      view, 2, 4, cudf::string_scalar("", false), cudf::get_default_stream());
 
     std::vector<const char*> h_expected{"eee", "bb", nullptr, nullptr, "aa", "bbb", "ééé"};
     cudf::test::strings_column_wrapper expected(
@@ -59,17 +61,20 @@ TEST_F(StringsFillTest, Fill)
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results = cudf::strings::detail::fill(view, 5, 5, cudf::string_scalar("zz"));
+    auto results = cudf::strings::detail::fill(
+      view, 5, 5, cudf::string_scalar("zz"), cudf::get_default_stream());
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, view.parent());
   }
   {
-    auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar(""));
+    auto results =
+      cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar(""), cudf::get_default_stream());
     cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
                                                 {1, 1, 1, 1, 1, 1, 1});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results = cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar("", false));
+    auto results = cudf::strings::detail::fill(
+      view, 0, 7, cudf::string_scalar("", false), cudf::get_default_stream());
     cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
                                                 {0, 0, 0, 0, 0, 0, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
@@ -80,8 +85,11 @@ TEST_F(StringsFillTest, ZeroSizeStringsColumns)
 {
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
-  auto results = cudf::strings::detail::fill(
-    cudf::strings_column_view(zero_size_strings_column), 0, 1, cudf::string_scalar(""));
+  auto results = cudf::strings::detail::fill(cudf::strings_column_view(zero_size_strings_column),
+                                             0,
+                                             1,
+                                             cudf::string_scalar(""),
+                                             cudf::get_default_stream());
   cudf::test::expect_column_empty(results->view());
 }
 
@@ -94,6 +102,10 @@ TEST_F(StringsFillTest, FillRangeError)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
   cudf::strings_column_view view(strings);
 
-  EXPECT_THROW(cudf::strings::detail::fill(view, 5, 1, cudf::string_scalar("")), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::detail::fill(view, 5, 9, cudf::string_scalar("")), cudf::logic_error);
+  EXPECT_THROW(
+    cudf::strings::detail::fill(view, 5, 1, cudf::string_scalar(""), cudf::get_default_stream()),
+    cudf::logic_error);
+  EXPECT_THROW(
+    cudf::strings::detail::fill(view, 5, 9, cudf::string_scalar(""), cudf::get_default_stream()),
+    cudf::logic_error);
 }
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index e938eec8b3e..52c3638d338 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -297,8 +297,8 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger)
   std::iota(h_integers.begin(), h_integers.end(), -(TypeParam)(h_integers.size() / 2));
   h_integers.push_back(std::numeric_limits<TypeParam>::min());
   h_integers.push_back(std::numeric_limits<TypeParam>::max());
-  auto d_integers    = cudf::detail::make_device_uvector_sync(h_integers);
-  auto integers      = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<TypeParam>()},
+  auto d_integers = cudf::detail::make_device_uvector_sync(h_integers, cudf::get_default_stream());
+  auto integers   = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id<TypeParam>()},
                                             (cudf::size_type)d_integers.size());
   auto integers_view = integers->mutable_view();
   CUDF_CUDA_TRY(cudaMemcpy(integers_view.data<TypeParam>(),
@@ -311,7 +311,7 @@ TYPED_TEST(StringsIntegerConvertTest, FromToInteger)
   auto results_strings = cudf::strings::from_integers(integers->view());
 
   // copy back to host
-  h_integers = cudf::detail::make_host_vector_sync(d_integers);
+  h_integers = cudf::detail::make_host_vector_sync(d_integers, cudf::get_default_stream());
   std::vector<std::string> h_strings;
   for (auto itr = h_integers.begin(); itr != h_integers.end(); ++itr)
     h_strings.push_back(std::to_string(*itr));
diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp
index d56b2160ca3..29f801eeaa4 100644
--- a/cpp/tests/structs/utilities_tests.cpp
+++ b/cpp/tests/structs/utilities_tests.cpp
@@ -344,7 +344,7 @@ void test_non_struct_columns(cudf::column_view const& input)
 {
   // superimpose_parent_nulls() on non-struct columns should return the input column, unchanged.
   auto [superimposed, backing_validity_buffers] =
-    cudf::structs::detail::superimpose_parent_nulls(input);
+    cudf::structs::detail::superimpose_parent_nulls(input, cudf::get_default_stream());
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input, superimposed);
   EXPECT_TRUE(backing_validity_buffers.empty());
@@ -395,7 +395,7 @@ TYPED_TEST(TypedSuperimposeTest, BasicStruct)
 
   // Reset STRUCTs' null-mask. Mark first STRUCT row as null.
   auto structs_view = structs_input->mutable_view();
-  cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false);
+  cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false, cudf::get_default_stream());
 
   // At this point, the STRUCT nulls aren't pushed down to members,
   // even though the parent null-mask was modified.
@@ -404,7 +404,8 @@ TYPED_TEST(TypedSuperimposeTest, BasicStruct)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
     structs_view.child(1), make_lists_member<T>(cudf::test::iterators::nulls_at({4, 5})));
 
-  auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(structs_view);
+  auto [output, backing_buffers] =
+    cudf::structs::detail::superimpose_parent_nulls(structs_view, cudf::get_default_stream());
 
   // After superimpose_parent_nulls(), the struct nulls (i.e. at index-0) should have been pushed
   // down to the children. All members should have nulls at row-index 0.
@@ -429,8 +430,8 @@ TYPED_TEST(TypedSuperimposeTest, NonNullableParentStruct)
                                                           cudf::test::iterators::no_nulls()}
                          .release();
 
-  auto [output, backing_buffers] =
-    cudf::structs::detail::superimpose_parent_nulls(structs_input->view());
+  auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(
+    structs_input->view(), cudf::get_default_stream());
 
   // After superimpose_parent_nulls(), none of the child structs should have changed,
   // because the parent had no nulls to begin with.
@@ -459,13 +460,13 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNonNullable)
 
   // Reset STRUCTs' null-mask. Mark first STRUCT row as null.
   auto structs_view = outer_struct_members.back()->mutable_view();
-  cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false);
+  cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false, cudf::get_default_stream());
 
   auto structs_of_structs =
     cudf::test::structs_column_wrapper{std::move(outer_struct_members)}.release();
 
-  auto [output, backing_buffers] =
-    cudf::structs::detail::superimpose_parent_nulls(structs_of_structs->view());
+  auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(
+    structs_of_structs->view(), cudf::get_default_stream());
 
   // After superimpose_parent_nulls(), outer-struct column should not have pushed nulls to child
   // structs. But the child struct column must push its nulls to its own children.
@@ -496,7 +497,7 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable)
   // Reset STRUCTs' null-mask. Mark first STRUCT row as null.
   auto structs_view = outer_struct_members.back()->mutable_view();
   auto num_rows     = structs_view.size();
-  cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false);
+  cudf::detail::set_null_mask(structs_view.null_mask(), 0, 1, false, cudf::get_default_stream());
 
   auto structs_of_structs = cudf::test::structs_column_wrapper{std::move(outer_struct_members),
                                                                std::vector<bool>(num_rows, true)}
@@ -504,10 +505,11 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_ChildNullable_ParentNullable)
 
   // Modify STRUCT-of-STRUCT's null-mask. Mark second STRUCT row as null.
   auto structs_of_structs_view = structs_of_structs->mutable_view();
-  cudf::detail::set_null_mask(structs_of_structs_view.null_mask(), 1, 2, false);
+  cudf::detail::set_null_mask(
+    structs_of_structs_view.null_mask(), 1, 2, false, cudf::get_default_stream());
 
-  auto [output, backing_buffers] =
-    cudf::structs::detail::superimpose_parent_nulls(structs_of_structs->view());
+  auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(
+    structs_of_structs->view(), cudf::get_default_stream());
 
   // After superimpose_parent_nulls(), outer-struct column should not have pushed nulls to child
   // structs. But the child struct column must push its nulls to its own children.
@@ -528,7 +530,8 @@ cudf::column_view slice_off_first_and_last_rows(cudf::column_view const& col)
 
 void mark_row_as_null(cudf::mutable_column_view const& col, cudf::size_type row_index)
 {
-  cudf::detail::set_null_mask(col.null_mask(), row_index, row_index + 1, false);
+  cudf::detail::set_null_mask(
+    col.null_mask(), row_index, row_index + 1, false, cudf::get_default_stream());
 }
 
 TYPED_TEST(TypedSuperimposeTest, Struct_Sliced)
@@ -561,7 +564,8 @@ TYPED_TEST(TypedSuperimposeTest, Struct_Sliced)
   // nums_member:  11011
   // lists_member: 00111
 
-  auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(sliced_structs);
+  auto [output, backing_buffers] =
+    cudf::structs::detail::superimpose_parent_nulls(sliced_structs, cudf::get_default_stream());
 
   // After superimpose_parent_nulls(), the null masks should be:
   // STRUCT:       11110
@@ -613,7 +617,8 @@ TYPED_TEST(TypedSuperimposeTest, NestedStruct_Sliced)
   // nums_member:    11010
   // lists_member:   00110
 
-  auto [output, backing_buffers] = cudf::structs::detail::superimpose_parent_nulls(sliced_structs);
+  auto [output, backing_buffers] =
+    cudf::structs::detail::superimpose_parent_nulls(sliced_structs, cudf::get_default_stream());
 
   // After superimpose_parent_nulls(), the null masks will be:
   // STRUCT<STRUCT>: 11101
diff --git a/cpp/tests/table/table_view_tests.cu b/cpp/tests/table/table_view_tests.cu
index a092006bda6..0542d007ca0 100644
--- a/cpp/tests/table/table_view_tests.cu
+++ b/cpp/tests/table/table_view_tests.cu
@@ -47,7 +47,8 @@ void row_comparison(cudf::table_view input1,
 
   auto device_table_1 = cudf::table_device_view::create(input1, stream);
   auto device_table_2 = cudf::table_device_view::create(input2, stream);
-  auto d_column_order = cudf::detail::make_device_uvector_sync(column_order);
+  auto d_column_order =
+    cudf::detail::make_device_uvector_sync(column_order, cudf::get_default_stream());
 
   auto comparator = cudf::row_lexicographic_comparator(
     cudf::nullate::NO{}, *device_table_1, *device_table_2, d_column_order.data());
diff --git a/cpp/tests/types/type_dispatcher_test.cu b/cpp/tests/types/type_dispatcher_test.cu
index e3856759cfc..911911851f2 100644
--- a/cpp/tests/types/type_dispatcher_test.cu
+++ b/cpp/tests/types/type_dispatcher_test.cu
@@ -69,7 +69,7 @@ __global__ void dispatch_test_kernel(cudf::type_id id, bool* d_result)
 
 TYPED_TEST(TypedDispatcherTest, DeviceDispatch)
 {
-  auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
+  auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1, cudf::get_default_stream());
   dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     cudf::type_to_id<TypeParam>(), result.data());
   CUDF_CUDA_TRY(cudaDeviceSynchronize());
@@ -130,7 +130,7 @@ __global__ void double_dispatch_test_kernel(cudf::type_id id1, cudf::type_id id2
 
 TYPED_TEST(TypedDoubleDispatcherTest, DeviceDoubleDispatch)
 {
-  auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1);
+  auto result = cudf::detail::make_zeroed_device_uvector_sync<bool>(1, cudf::get_default_stream());
   double_dispatch_test_kernel<<<1, 1, 0, cudf::get_default_stream().value()>>>(
     cudf::type_to_id<TypeParam>(), cudf::type_to_id<TypeParam>(), result.data());
   CUDF_CUDA_TRY(cudaDeviceSynchronize());
diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu
index 080bb3ef916..417d4b4a6fc 100644
--- a/cpp/tests/utilities/column_utilities.cu
+++ b/cpp/tests/utilities/column_utilities.cu
@@ -495,7 +495,7 @@ std::string stringify_column_differences(cudf::device_span<int const> difference
   CUDF_EXPECTS(not differences.empty(), "Shouldn't enter this function if `differences` is empty");
   std::string const depth_str = depth > 0 ? "depth " + std::to_string(depth) + '\n' : "";
   // move the differences to the host.
-  auto h_differences = cudf::detail::make_host_vector_sync(differences);
+  auto h_differences = cudf::detail::make_host_vector_sync(differences, cudf::get_default_stream());
   if (verbosity == debug_output_level::ALL_ERRORS) {
     std::ostringstream buffer;
     buffer << depth_str << "differences:" << std::endl;
@@ -976,7 +976,8 @@ std::string nested_offsets_to_string(NestedColumnView const& c, std::string cons
     shifted_offsets.begin(),
     [first] __device__(int32_t offset) { return static_cast<size_type>(offset - first); });
 
-  auto const h_shifted_offsets = cudf::detail::make_host_vector_sync(shifted_offsets);
+  auto const h_shifted_offsets =
+    cudf::detail::make_host_vector_sync(shifted_offsets, cudf::get_default_stream());
   std::ostringstream buffer;
   for (size_t idx = 0; idx < h_shifted_offsets.size(); idx++) {
     buffer << h_shifted_offsets[idx];
diff --git a/java/src/main/native/src/ColumnVectorJni.cpp b/java/src/main/native/src/ColumnVectorJni.cpp
index f01d832eb19..39d58f896ea 100644
--- a/java/src/main/native/src/ColumnVectorJni.cpp
+++ b/java/src/main/native/src/ColumnVectorJni.cpp
@@ -307,8 +307,9 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnVector_concatenate(JNIEnv *env
     auto columns =
         cudf::jni::native_jpointerArray<column_view>{env, column_handles}.get_dereferenced();
     auto const is_lists_column = columns[0].type().id() == cudf::type_id::LIST;
-    return release_as_jlong(is_lists_column ? cudf::lists::detail::concatenate(columns) :
-                                              cudf::concatenate(columns));
+    return release_as_jlong(
+        is_lists_column ? cudf::lists::detail::concatenate(columns, cudf::get_default_stream()) :
+                          cudf::concatenate(columns));
   }
   CATCH_STD(env, 0);
 }
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 716a9f0b834..e2a96de93ef 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1322,8 +1322,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVV(JNIEnv *env, j
       }
 
       auto out_view = out->mutable_view();
-      cudf::binops::compiled::detail::apply_sorting_struct_binary_op(out_view, *lhs, *rhs, false,
-                                                                     false, op);
+      cudf::binops::compiled::detail::apply_sorting_struct_binary_op(
+          out_view, *lhs, *rhs, false, false, op, cudf::get_default_stream());
       return release_as_jlong(out);
     }
 
@@ -1369,8 +1369,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j
 
       auto rhsv = cudf::make_column_from_scalar(*rhs, 1);
       auto out_view = out->mutable_view();
-      cudf::binops::compiled::detail::apply_sorting_struct_binary_op(out_view, *lhs, rhsv->view(),
-                                                                     false, true, op);
+      cudf::binops::compiled::detail::apply_sorting_struct_binary_op(
+          out_view, *lhs, rhsv->view(), false, true, op, cudf::get_default_stream());
       return release_as_jlong(out);
     }
 
diff --git a/java/src/main/native/src/ColumnViewJni.cu b/java/src/main/native/src/ColumnViewJni.cu
index a3f9ab5928d..f52e98b90b9 100644
--- a/java/src/main/native/src/ColumnViewJni.cu
+++ b/java/src/main/native/src/ColumnViewJni.cu
@@ -52,10 +52,10 @@ new_column_with_boolean_column_as_validity(cudf::column_view const &exemplar,
   auto validity_begin = cudf::detail::make_optional_iterator<bool>(
       *validity_device_view, cudf::nullate::DYNAMIC{validity_column.has_nulls()});
   auto validity_end = validity_begin + validity_device_view->size();
-  auto [null_mask, null_count] =
-      cudf::detail::valid_if(validity_begin, validity_end, [] __device__(auto optional_bool) {
-        return optional_bool.value_or(false);
-      });
+  auto [null_mask, null_count] = cudf::detail::valid_if(
+      validity_begin, validity_end,
+      [] __device__(auto optional_bool) { return optional_bool.value_or(false); },
+      cudf::get_default_stream());
   auto const exemplar_without_null_mask = cudf::column_view{
       exemplar.type(),
       exemplar.size(),
@@ -152,8 +152,8 @@ void post_process_list_overlap(cudf::column_view const &lhs, cudf::column_view c
                    });
 
   // Create a new nullmask from the validity data.
-  auto [new_null_mask, new_null_count] =
-      cudf::detail::valid_if(validity.begin(), validity.end(), thrust::identity{});
+  auto [new_null_mask, new_null_count] = cudf::detail::valid_if(
+      validity.begin(), validity.end(), thrust::identity{}, cudf::get_default_stream());
 
   if (new_null_count > 0) {
     // If the `overlap_result` column is nullable, perform `bitmask_and` of its nullmask and the
diff --git a/java/src/main/native/src/ScalarJni.cpp b/java/src/main/native/src/ScalarJni.cpp
index b44d2604882..e47728f6acc 100644
--- a/java/src/main/native/src/ScalarJni.cpp
+++ b/java/src/main/native/src/ScalarJni.cpp
@@ -514,8 +514,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Scalar_binaryOpSV(JNIEnv *env, jclas
 
       auto lhs_col = cudf::make_column_from_scalar(*lhs, 1);
       auto out_view = out->mutable_view();
-      cudf::binops::compiled::detail::apply_sorting_struct_binary_op(out_view, lhs_col->view(),
-                                                                     *rhs, true, false, op);
+      cudf::binops::compiled::detail::apply_sorting_struct_binary_op(
+          out_view, lhs_col->view(), *rhs, true, false, op, cudf::get_default_stream());
       return release_as_jlong(out);
     }
 

From 646a7e331c64c53d2b39236b057528aea3122e97 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 26 Oct 2022 18:55:04 -0400
Subject: [PATCH 082/202] Fix doxygen text for cudf::dictionary::encode
 (#11991)

Fixes the example code in the doxygen comment for `cudf::dictionary::encode` to use the correct API name.
No function has change -- just code comments that generate public doxygen content.
https://docs.rapids.ai/api/libcudf/stable/group__dictionary__encode.html#ga06997026d694784d613f4590563a8b33

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - Bradley Dice (https://github.com/bdice)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/11991
---
 cpp/include/cudf/dictionary/encode.hpp | 32 +++++++++++++-------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/cpp/include/cudf/dictionary/encode.hpp b/cpp/include/cudf/dictionary/encode.hpp
index 50b81187091..fb13eabe11a 100644
--- a/cpp/include/cudf/dictionary/encode.hpp
+++ b/cpp/include/cudf/dictionary/encode.hpp
@@ -31,7 +31,7 @@ namespace dictionary {
  */
 
 /**
- * @brief Construct a dictionary column by dictionary encoding an existing column.
+ * @brief Construct a dictionary column by dictionary encoding an existing column
  *
  * The output column is a DICTIONARY type with a keys column of non-null, unique values
  * that are in a strict, total order. Meaning, `keys[i]` is _ordered before
@@ -40,21 +40,21 @@ namespace dictionary {
  * The output column has a child indices column that is of integer type and with
  * the same size as the input column.
  *
- * The null_mask and null count are copied from the input column to the output column.
+ * The null mask and null count are copied from the input column to the output column.
  *
- * @throw cudf::logic_error if indices type is not an unsigned integer type.
- * @throw cudf::logic_error if the column to encode is already a DICTIONARY type.
+ * @throw cudf::logic_error if indices type is not an unsigned integer type
+ * @throw cudf::logic_error if the column to encode is already a DICTIONARY type
  *
  * @code{.pseudo}
- * c = [429,111,213,111,213,429,213]
- * d = make_dictionary_column(c)
- * d now has keys [111,213,429] and indices [2,0,1,0,1,2,1]
+ * c = [429, 111, 213, 111, 213, 429, 213]
+ * d = encode(c)
+ * d now has keys [111, 213, 429] and indices [2, 0, 1, 0, 1, 2, 1]
  * @endcode
  *
- * @param column The column to dictionary encode.
- * @param indices_type The integer type to use for the indices.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return Returns a dictionary column.
+ * @param column The column to dictionary encode
+ * @param indices_type The integer type to use for the indices
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return Returns a dictionary column
  */
 std::unique_ptr<column> encode(
   column_view const& column,
@@ -66,14 +66,14 @@ std::unique_ptr<column> encode(
  * dictionary_column into a new column using the indices from that column.
  *
  * @code{.pseudo}
- * d1 = {["a","c","d"],[2,0,1,0]}
+ * d1 = {["a", "c", "d"], [2, 0, 1, 0]}
  * s = decode(d1)
- * s is now ["d","a","c","a"]
+ * s is now ["d", "a", "c", "a"]
  * @endcode
  *
- * @param dictionary_column Existing dictionary column.
- * @param mr Device memory resource used to allocate the returned column's device memory.
- * @return New column with type matching the dictionary_column's keys.
+ * @param dictionary_column Existing dictionary column
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column with type matching the dictionary_column's keys
  */
 std::unique_ptr<column> decode(
   dictionary_column_view const& dictionary_column,

From cd21ce79274318c2d4e9d95648e311aa4fe8b02c Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Thu, 27 Oct 2022 12:04:39 -0500
Subject: [PATCH 083/202] Remove unnecessary code from dask-cudf _Frame
 (#12001)

Removes unnecessary code from `dask_cudf.core._Frame` that is already handled in the super-class (`dask.dataframe.core._Frame`). By removing the unnecessary `__init__` logic from `dask_cudf`, we can avoid breakages from upstream changes like https://github.com/dask/dask/pull/9473.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12001
---
 python/dask_cudf/dask_cudf/core.py | 32 ++----------------------------
 1 file changed, 2 insertions(+), 30 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/core.py b/python/dask_cudf/dask_cudf/core.py
index 76705e7cbf1..04b6ff401dc 100644
--- a/python/dask_cudf/dask_cudf/core.py
+++ b/python/dask_cudf/dask_cudf/core.py
@@ -14,7 +14,6 @@
 from dask.base import normalize_token, tokenize
 from dask.dataframe.core import (
     Scalar,
-    finalize,
     handle_out,
     make_meta as dask_make_meta,
     map_partitions,
@@ -55,35 +54,8 @@ class _Frame(dd.core._Frame, OperatorMethodMixin):
         Values along which we partition our blocks on the index
     """
 
-    __dask_scheduler__ = staticmethod(dask.get)
-
-    def __dask_postcompute__(self):
-        return finalize, ()
-
-    def __dask_postpersist__(self):
-        return type(self), (self._name, self._meta, self.divisions)
-
-    @_dask_cudf_nvtx_annotate
-    def __init__(self, dsk, name, meta, divisions):
-        if not isinstance(dsk, HighLevelGraph):
-            dsk = HighLevelGraph.from_collections(name, dsk, dependencies=[])
-        self.dask = dsk
-        self._name = name
-        meta = dask_make_meta(meta)
-        if not isinstance(meta, self._partition_type):
-            raise TypeError(
-                f"Expected meta to specify type "
-                f"{self._partition_type.__name__}, got type "
-                f"{type(meta).__name__}"
-            )
-        self._meta = meta
-        self.divisions = tuple(divisions)
-
-    def __getstate__(self):
-        return (self.dask, self._name, self._meta, self.divisions)
-
-    def __setstate__(self, state):
-        self.dask, self._name, self._meta, self.divisions = state
+    def _is_partition_type(self, meta):
+        return isinstance(meta, self._partition_type)
 
     def __repr__(self):
         s = "<dask_cudf.%s | %d tasks | %d npartitions>"

From 8d49db5bbd959d8b1ea28cbebf896e6e911716f5 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 27 Oct 2022 13:21:38 -0500
Subject: [PATCH 084/202] Ignore python docs build artifacts (#12000)

This PR gitignores some of the python docs build artifcats that keep showing up in `git status`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12000
---
 .gitignore                              | 2 ++
 docs/cudf/source/user_guide/10min.ipynb | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index aaac92ff643..91a7ecc49f7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -165,3 +165,5 @@ dask-worker-space/
 # Sphinx docs & build artifacts
 docs/cudf/source/api_docs/generated/*
 docs/cudf/source/api_docs/api/*
+docs/cudf/source/user_guide/example_output/*
+docs/cudf/source/user_guide/cudf.*Dtype.*.rst
diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index b9278151e64..ce6c55fe134 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -5474,7 +5474,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Writing to parquet files, using the CPU via PyArrow."
+    "Writing to parquet files with GPU-accelerated parquet writer"
    ]
   },
   {
@@ -5749,7 +5749,7 @@
     }
    ],
    "source": [
-    "ddf.to_parquet('example_files')  "
+    "ddf.to_parquet('example_output/ddf_parquet_files')"
    ]
   },
   {

From b4ca89492b5e4bf8d54102a4082c2d362350e783 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 27 Oct 2022 20:26:22 +0200
Subject: [PATCH 085/202] Add `strip_delimiters` option to `read_text` (#11946)

This adds a `strip_delimiters` post-processing option to `read_text`. I needed to implement some lightweight striping because a thread-per-row parallelization of the string gather gave pretty bad performance.

For consistency, I also removed the special-case handling of delimiters at the end (previously adding an empty row), to match the read_csv behavior.

Benchmark results:

```
benchmarks/MULTIBYTE_SPLIT_NVBENCH --axis size_approx[pow2]=30 --axis byte_range_percent=100 --axis T=device --axis delim_size=4
```

### [0] Tesla T4

|   T    | strip_delimiters | delim_percent |    size_approx    |   CPU Time  | Noise | Peak Memory Usage | Encoded file size |
|--------|------------------|---------------|-------------------|------------|-------|-------------------|-------------------|
| device |                0 |             1 | 2^30 = 1073741824 | 178.133 ms | 0.36% |         3.709 GiB |      1014.442 MiB |
| device |                1 |             1 | 2^30 = 1073741824 | 188.328 ms | 0.31% |         4.690 GiB |      1014.442 MiB |
| device |                0 |            25 | 2^30 = 1073741824 | 206.188 ms | 0.03% |         5.292 GiB |       953.075 MiB |
| device |                1 |            25 | 2^30 = 1073741824 | 242.534 ms | 0.50% |         5.975 GiB |       953.075 MiB |

Closes #11625

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11946
---
 cpp/benchmarks/io/text/multibyte_split.cpp   |  5 +-
 cpp/include/cudf/io/text/multibyte_split.hpp | 30 +++++-
 cpp/src/io/text/multibyte_split.cu           | 48 ++++++++--
 cpp/tests/io/text/multibyte_split_test.cpp   | 98 +++++++++++++++++++-
 python/cudf/cudf/_lib/cpp/io/text.pxd        | 10 +-
 python/cudf/cudf/_lib/text.pyx               | 22 +++--
 python/cudf/cudf/io/text.py                  |  2 +
 python/cudf/cudf/tests/test_text.py          | 24 ++++-
 python/cudf/cudf/utils/ioutils.py            |  7 ++
 9 files changed, 212 insertions(+), 34 deletions(-)

diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index 380766fee46..c0e82b34623 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -121,6 +121,7 @@ static void bench_multibyte_split(nvbench::state& state,
   auto const delim_percent      = state.get_int64("delim_percent");
   auto const file_size_approx   = state.get_int64("size_approx");
   auto const byte_range_percent = state.get_int64("byte_range_percent");
+  auto const strip_delimiters   = bool(state.get_int64("strip_delimiters"));
 
   auto const byte_range_factor = static_cast<double>(byte_range_percent) / 100;
   CUDF_EXPECTS(delim_percent >= 1, "delimiter percent must be at least 1");
@@ -182,12 +183,13 @@ static void bench_multibyte_split(nvbench::state& state,
   auto const range_size   = static_cast<int64_t>(device_input.size() * byte_range_factor);
   auto const range_offset = (device_input.size() - range_size) / 2;
   cudf::io::text::byte_range_info range{range_offset, range_size};
+  cudf::io::text::parse_options options{range, strip_delimiters};
   std::unique_ptr<cudf::column> output;
 
   state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     try_drop_l3_cache();
-    output = cudf::io::text::multibyte_split(*source, delim, range);
+    output = cudf::io::text::multibyte_split(*source, delim, options);
   });
 
   state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage");
@@ -203,6 +205,7 @@ using source_type_list = nvbench::enum_type_list<data_chunk_source_type::device,
 
 NVBENCH_BENCH_TYPES(bench_multibyte_split, NVBENCH_TYPE_AXES(source_type_list))
   .set_name("multibyte_split")
+  .add_int64_axis("strip_delimiters", {0, 1})
   .add_int64_axis("delim_size", {1, 4, 7})
   .add_int64_axis("delim_percent", {1, 25})
   .add_int64_power_of_two_axis("size_approx", {15, 30})
diff --git a/cpp/include/cudf/io/text/multibyte_split.hpp b/cpp/include/cudf/io/text/multibyte_split.hpp
index abb966a55bf..a7edc9be0e4 100644
--- a/cpp/include/cudf/io/text/multibyte_split.hpp
+++ b/cpp/include/cudf/io/text/multibyte_split.hpp
@@ -30,11 +30,25 @@ namespace cudf {
 namespace io {
 namespace text {
 
+/**
+ * @brief Parsing options for multibyte_split.
+ */
+struct parse_options {
+  /**
+   * @brief Only rows starting inside this byte range will be part of the output column.
+   */
+  byte_range_info byte_range = create_byte_range_info_max();
+  /**
+   * @brief Whether delimiters at the end of rows should be stripped from the output column
+   */
+  bool strip_delimiters = false;
+};
+
 /**
  * @brief Splits the source text into a strings column using a multiple byte delimiter.
  *
- * Providing a byte range allows multibyte_split to read a whole file, but only return the offsets
- * of delimiters which begin within the range. If thinking in terms of "records", where each
+ * Providing a byte range allows multibyte_split to read a file partially, only returning the
+ * offsets of delimiters which begin within the range. If thinking in terms of "records", where each
  * delimiter dictates the end of a record, all records which begin within the byte range provided
  * will be returned, including any record which may begin in the range but end outside of the
  * range. Records which begin outside of the range will ignored, even if those records end inside
@@ -63,7 +77,7 @@ namespace text {
  *
  * @param source The source string
  * @param delimiter UTF-8 encoded string for which to find offsets in the source
- * @param byte_range range in which to consider offsets relevant
+ * @param options the parsing options to use (including byte range)
  * @param mr Memory resource to use for the device memory allocation
  * @return The strings found by splitting the source by the delimiter within the relevant byte
  * range.
@@ -71,8 +85,14 @@ namespace text {
 std::unique_ptr<cudf::column> multibyte_split(
   data_chunk_source const& source,
   std::string const& delimiter,
-  std::optional<byte_range_info> byte_range = std::nullopt,
-  rmm::mr::device_memory_resource* mr       = rmm::mr::get_current_device_resource());
+  parse_options options               = {},
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+std::unique_ptr<cudf::column> multibyte_split(
+  data_chunk_source const& source,
+  std::string const& delimiter,
+  std::optional<byte_range_info> byte_range,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 std::unique_ptr<cudf::column> multibyte_split(data_chunk_source const& source,
                                               std::string const& delimiter,
diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index 29cec0e8c3f..0d699fc72fd 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -21,13 +21,16 @@
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/io/text/byte_range_info.hpp>
 #include <cudf/io/text/data_chunk_source.hpp>
 #include <cudf/io/text/detail/multistate.hpp>
 #include <cudf/io/text/detail/tile_state.hpp>
+#include <cudf/io/text/multibyte_split.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/detail/strings_column_factories.cuh>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/span.hpp>
 
@@ -551,6 +554,7 @@ class output_builder {
 std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
                                               std::string const& delimiter,
                                               byte_range_info byte_range,
+                                              bool strip_delimiters,
                                               rmm::cuda_stream_view stream,
                                               rmm::mr::device_memory_resource* mr,
                                               rmm::cuda_stream_pool& stream_pool)
@@ -756,8 +760,12 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
   auto chars          = char_storage.gather(stream, mr);
   auto global_offsets = row_offset_storage.gather(stream, mr);
 
-  bool const insert_begin = *first_row_offset == 0;
-  bool const insert_end   = not last_row_offset.has_value() or last_row_offset == chunk_offset;
+  // insert an offset at the beginning if we started at the beginning of the input
+  bool const insert_begin = first_row_offset.value_or(0) == 0;
+  // insert an offset at the end if we have not terminated the last row
+  bool const insert_end =
+    not(last_row_offset.has_value() or
+        (global_offsets.size() > 0 and global_offsets.back_element(stream) == chunk_offset));
   rmm::device_uvector<int32_t> offsets{
     global_offsets.size() + insert_begin + insert_end, stream, mr};
   if (insert_begin) { offsets.set_element_to_zero_async(0, stream); }
@@ -771,10 +779,27 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                     [baseline = *first_row_offset] __device__(byte_offset global_offset) {
                       return static_cast<int32_t>(global_offset - baseline);
                     });
-
   auto string_count = offsets.size() - 1;
-
-  return cudf::make_strings_column(string_count, std::move(offsets), std::move(chars));
+  if (strip_delimiters) {
+    auto it = cudf::detail::make_counting_transform_iterator(
+      0,
+      [ofs        = offsets.data(),
+       chars      = chars.data(),
+       delim_size = static_cast<size_type>(delimiter.size()),
+       last_row   = static_cast<size_type>(string_count) - 1,
+       insert_end] __device__(size_type row) {
+        auto const begin = ofs[row];
+        auto const len   = ofs[row + 1] - begin;
+        if (row == last_row && insert_end) {
+          return thrust::make_pair(chars + begin, len);
+        } else {
+          return thrust::make_pair(chars + begin, std::max<size_type>(0, len - delim_size));
+        };
+      });
+    return cudf::strings::detail::make_strings_column(it, it + string_count, stream, mr);
+  } else {
+    return cudf::make_strings_column(string_count, std::move(offsets), std::move(chars));
+  }
 }
 
 }  // namespace detail
@@ -783,12 +808,21 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                                               std::string const& delimiter,
                                               std::optional<byte_range_info> byte_range,
                                               rmm::mr::device_memory_resource* mr)
+{
+  return multibyte_split(
+    source, delimiter, parse_options{byte_range.value_or(create_byte_range_info_max())}, mr);
+}
+
+std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
+                                              std::string const& delimiter,
+                                              parse_options options,
+                                              rmm::mr::device_memory_resource* mr)
 {
   auto stream      = cudf::get_default_stream();
   auto stream_pool = rmm::cuda_stream_pool(2);
 
   auto result = detail::multibyte_split(
-    source, delimiter, byte_range.value_or(create_byte_range_info_max()), stream, mr, stream_pool);
+    source, delimiter, options.byte_range, options.strip_delimiters, stream, mr, stream_pool);
 
   return result;
 }
@@ -797,7 +831,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
                                               std::string const& delimiter,
                                               rmm::mr::device_memory_resource* mr)
 {
-  return multibyte_split(source, delimiter, std::nullopt, mr);
+  return multibyte_split(source, delimiter, parse_options{}, mr);
 }
 
 }  // namespace text
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index 43debf3d5b3..2da7073b334 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -62,12 +62,25 @@ TEST_F(MultibyteSplitTest, NondeterministicMatching)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
 }
 
+TEST_F(MultibyteSplitTest, NoDelimiter)
+{
+  auto delimiter  = std::string(":");
+  auto host_input = std::string("abcdefg");
+
+  auto expected = strings_column_wrapper{"abcdefg"};
+
+  auto source = cudf::io::text::make_source(host_input);
+  auto out    = cudf::io::text::multibyte_split(*source, delimiter);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
+}
+
 TEST_F(MultibyteSplitTest, DelimiterAtEnd)
 {
   auto delimiter  = std::string(":");
   auto host_input = std::string("abcdefg:");
 
-  auto expected = strings_column_wrapper{"abcdefg:", ""};
+  auto expected = strings_column_wrapper{"abcdefg:"};
 
   auto source = cudf::io::text::make_source(host_input);
   auto out    = cudf::io::text::multibyte_split(*source, delimiter);
@@ -80,7 +93,7 @@ TEST_F(MultibyteSplitTest, DelimiterAtEndByteRange)
   auto delimiter  = std::string(":");
   auto host_input = std::string("abcdefg:");
 
-  auto expected = strings_column_wrapper{"abcdefg:", ""};
+  auto expected = strings_column_wrapper{"abcdefg:"};
 
   auto source = cudf::io::text::make_source(host_input);
   auto out    = cudf::io::text::multibyte_split(
@@ -91,6 +104,22 @@ TEST_F(MultibyteSplitTest, DelimiterAtEndByteRange)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
 }
 
+TEST_F(MultibyteSplitTest, DelimiterAtEndByteRange2)
+{
+  auto delimiter  = std::string(":");
+  auto host_input = std::string("abcdefg:");
+
+  auto expected = strings_column_wrapper{"abcdefg:"};
+
+  auto source = cudf::io::text::make_source(host_input);
+  auto out    = cudf::io::text::multibyte_split(
+    *source,
+    delimiter,
+    cudf::io::text::byte_range_info{0, static_cast<int64_t>(host_input.size() - 1)});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
+}
+
 TEST_F(MultibyteSplitTest, LargeInputSparse)
 {
   auto host_input    = std::string(1024 * 1024 * 32, '.');
@@ -120,8 +149,6 @@ TEST_F(MultibyteSplitTest, LargeInput)
     host_expected.emplace_back(std::string("...:|"));
   }
 
-  host_expected.emplace_back(std::string(""));
-
   auto expected = strings_column_wrapper{host_expected.begin(), host_expected.end()};
 
   auto delimiter = std::string("...:|");
@@ -146,6 +173,52 @@ TEST_F(MultibyteSplitTest, OverlappingMatchErasure)
   // CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out); // this use case it not yet supported.
 }
 
+TEST_F(MultibyteSplitTest, DelimiterErasure)
+{
+  auto delimiter = "\r\n";
+
+  auto host_input = std::string("line\r\nanother line\r\nthird line\r\n");
+  auto expected   = strings_column_wrapper{"line", "another line", "third line"};
+
+  cudf::io::text::parse_options options;
+  options.strip_delimiters = true;
+  auto source              = cudf::io::text::make_source(host_input);
+  auto out                 = cudf::io::text::multibyte_split(*source, delimiter, options);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
+}
+
+TEST_F(MultibyteSplitTest, DelimiterErasureByteRange)
+{
+  auto delimiter = "\r\n";
+
+  auto host_input = std::string("line\r\nanother line\r\nthird line\r\n");
+  auto expected   = strings_column_wrapper{"line", "another line", "third line"};
+
+  cudf::io::text::parse_options options;
+  options.strip_delimiters = true;
+  options.byte_range       = cudf::io::text::byte_range_info(0, host_input.size() - 1);
+  auto source              = cudf::io::text::make_source(host_input);
+  auto out                 = cudf::io::text::multibyte_split(*source, delimiter, options);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
+}
+
+TEST_F(MultibyteSplitTest, DelimiterErasureOverlap)
+{
+  auto delimiter = "::";
+
+  auto host_input = std::string("::a:::b::c::::d");
+  auto expected   = strings_column_wrapper{"", "a", "", "b", "c", "", "", "d"};
+
+  cudf::io::text::parse_options options;
+  options.strip_delimiters = true;
+  auto source              = cudf::io::text::make_source(host_input);
+  auto out                 = cudf::io::text::multibyte_split(*source, delimiter, options);
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out);
+}
+
 TEST_F(MultibyteSplitTest, HandpickedInput)
 {
   auto delimiters = "::|";
@@ -184,7 +257,7 @@ TEST_F(MultibyteSplitTest, HandpickedInput)
     "ggg::|",         "hhh::|",      "___::|",       "here::|", "is::|",     "another::|",
     "simple::|",      "text::|",     "seperated::|", "by::|",   "emojis::|", "which::|",
     "are::|",         "multiple::|", "bytes::|",     "and::|",  "used::|",   "as::|",
-    "delimiters.::|", "::|",         "::|",          "::|",     ""};
+    "delimiters.::|", "::|",         "::|",          "::|"};
 
   auto source = cudf::io::text::make_source(host_input);
   auto out    = cudf::io::text::multibyte_split(*source, delimiters);
@@ -359,6 +432,21 @@ TEST_F(MultibyteSplitTest, SmallInputAllPossibleRangesSingleByte)
   }
 }
 
+TEST_F(MultibyteSplitTest, SingletonRangeAtEnd)
+{
+  // we want a delimiter at the end of the file to not create a new empty row even if it is the only
+  // character in the byte range
+  using namespace cudf::io::text;
+  auto host_input = std::string("ab:cd:");
+  auto delimiter  = std::string(":");
+  auto source     = make_source(host_input);
+  auto expected   = strings_column_wrapper{};
+
+  auto out = multibyte_split(*source, delimiter, byte_range_info{5, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
+}
+
 TEST_F(MultibyteSplitTest, EmptyInput)
 {
   using namespace cudf::io::text;
diff --git a/python/cudf/cudf/_lib/cpp/io/text.pxd b/python/cudf/cudf/_lib/cpp/io/text.pxd
index 7bbe870dad3..368b014ea4b 100644
--- a/python/cudf/cudf/_lib/cpp/io/text.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/text.pxd
@@ -1,6 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint64_t
+from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 
@@ -37,9 +38,12 @@ cdef extern from "cudf/io/text/data_chunk_source_factories.hpp" \
 cdef extern from "cudf/io/text/multibyte_split.hpp" \
         namespace "cudf::io::text" nogil:
 
-    unique_ptr[column] multibyte_split(data_chunk_source source,
-                                       string delimiter) except +
+    cdef cppclass parse_options:
+        byte_range_info byte_range
+        bool strip_delimiters
+
+        parse_options() except +
 
     unique_ptr[column] multibyte_split(data_chunk_source source,
                                        string delimiter,
-                                       byte_range_info byte_range) except +
+                                       parse_options options) except +
diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index 31a5617af58..be11132497e 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -19,12 +19,14 @@ from cudf._lib.cpp.io.text cimport (
     make_source_from_bgzip_file,
     make_source_from_file,
     multibyte_split,
+    parse_options,
 )
 
 
 def read_text(object filepaths_or_buffers,
               object delimiter=None,
               object byte_range=None,
+              object strip_delimiters=False,
               object compression=None,
               object compression_offsets=None):
     """
@@ -44,6 +46,7 @@ def read_text(object filepaths_or_buffers,
     cdef byte_range_info c_byte_range
     cdef uint64_t c_compression_begin_offset
     cdef uint64_t c_compression_end_offset
+    cdef parse_options c_options
 
     if compression is None:
         if isinstance(filepaths_or_buffers, TextIOBase):
@@ -71,19 +74,18 @@ def read_text(object filepaths_or_buffers,
     else:
         raise ValueError("Only bgzip compression is supported at the moment")
 
-    if (byte_range is None):
-        with nogil:
-            c_col = move(multibyte_split(dereference(datasource), delim))
-    else:
+    c_options = parse_options()
+    if byte_range is not None:
         c_byte_range_offset = byte_range[0]
         c_byte_range_size = byte_range[1]
-        c_byte_range = byte_range_info(
+        c_options.byte_range = byte_range_info(
             c_byte_range_offset,
             c_byte_range_size)
-        with nogil:
-            c_col = move(multibyte_split(
-                dereference(datasource),
-                delim,
-                c_byte_range))
+    c_options.strip_delimiters = strip_delimiters
+    with nogil:
+        c_col = move(multibyte_split(
+            dereference(datasource),
+            delim,
+            c_options))
 
     return {None: Column.from_unique_ptr(move(c_col))}
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index 23983f01966..f341edbf6c1 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -14,6 +14,7 @@ def read_text(
     filepath_or_buffer,
     delimiter=None,
     byte_range=None,
+    strip_delimiters=False,
     compression=None,
     compression_offsets=None,
     **kwargs,
@@ -35,6 +36,7 @@ def read_text(
             filepath_or_buffer,
             delimiter=delimiter,
             byte_range=byte_range,
+            strip_delimiters=strip_delimiters,
             compression=compression,
             compression_offsets=compression_offsets,
         )
diff --git a/python/cudf/cudf/tests/test_text.py b/python/cudf/cudf/tests/test_text.py
index 7f41d606473..627bf0a68bb 100644
--- a/python/cudf/cudf/tests/test_text.py
+++ b/python/cudf/cudf/tests/test_text.py
@@ -827,14 +827,20 @@ def test_read_text_byte_range(datadir):
 
 
 def test_read_text_byte_range_large(tmpdir):
-    content = str([["\n" if x % 5 == 0 else "x"] for x in range(0, 3000)])
-    delimiter = "1."
+    content = "".join(("\n" if x % 5 == 4 else "x") for x in range(0, 3000))
+    delimiter = "\n"
     temp_file = str(tmpdir) + "/temp.txt"
 
     with open(temp_file, "w") as f:
         f.write(content)
 
-    cudf.read_text(temp_file, delimiter=delimiter)
+    expected = cudf.Series(["xxxx\n" for i in range(0, 200)])
+
+    actual = cudf.read_text(
+        temp_file, delimiter=delimiter, byte_range=[1000, 1000]
+    )
+
+    assert_eq(expected, actual)
 
 
 def test_read_text_in_memory(datadir):
@@ -847,6 +853,18 @@ def test_read_text_in_memory(datadir):
     assert_eq(expected, actual)
 
 
+def test_read_text_in_memory_strip_delimiter(datadir):
+    # Since Python split removes the delimiter and read_text does
+    # not we need to add it back to the 'content'
+    expected = cudf.Series(["x", "y", "z"])
+
+    actual = cudf.read_text(
+        StringIO("x::y::z"), delimiter="::", strip_delimiters=True
+    )
+
+    assert_eq(expected, actual)
+
+
 def test_read_text_bgzip(datadir):
     chess_file_compressed = str(datadir) + "/chess.pgn.gz"
     chess_file = str(datadir) + "/chess.pgn"
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 0a0647f1297..5298e470a91 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1215,6 +1215,13 @@
 delimiter : string, default None
     The delimiter that should be used for splitting text chunks into
     separate cudf column rows. The delimiter may be one or more characters.
+strip_delimiters : boolean, default False
+    Unlike the `str.split()` function, `read_text` preserves the delimiter
+    at the end of a field in output by default, meaning `a;b;c` will turn into
+    `['a;','b;','c']` when using `;` as a delimiter.
+    Setting this option to `True` will strip these trailing delimiters,
+    leaving only the contents between delimiters in the resulting column:
+    `['a','b','c']`
 byte_range : list or tuple, default None
     Byte range within the input file to be read. The first number is the
     offset in bytes, the second number is the range size in bytes.

From 43eb7a07c8ed1afdf5b80d2912d6e8993e5262d9 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 27 Oct 2022 21:26:55 +0200
Subject: [PATCH 086/202] Refactor multibyte_split `output_builder` (#11945)

This PR moves the `output_builder` and `split_device_span` classes out of `multibyte_split` and adds an iterator for the `split_device_span`, enabling it to be used directly in Thrust algorithms.

I also included a fix from #11875 to make the integration easier once that is merged.

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/11945
---
 cpp/src/io/text/multibyte_split.cu         | 217 +------------
 cpp/src/io/utilities/output_builder.cuh    | 357 +++++++++++++++++++++
 cpp/tests/io/text/multibyte_split_test.cpp |  64 ++++
 3 files changed, 423 insertions(+), 215 deletions(-)
 create mode 100644 cpp/src/io/utilities/output_builder.cuh

diff --git a/cpp/src/io/text/multibyte_split.cu b/cpp/src/io/text/multibyte_split.cu
index 0d699fc72fd..1177be6b63f 100644
--- a/cpp/src/io/text/multibyte_split.cu
+++ b/cpp/src/io/text/multibyte_split.cu
@@ -14,10 +14,7 @@
  * limitations under the License.
  */
 
-// Can be removed once we use Thrust 1.16+
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wpragmas"
-#pragma GCC diagnostic ignored "-Wsizeof-array-div"
+#include <io/utilities/output_builder.cuh>
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
@@ -48,54 +45,12 @@
 #include <cub/block/block_load.cuh>
 #include <cub/block/block_scan.cuh>
 
-#pragma GCC diagnostic pop
-
 #include <cstdint>
 #include <limits>
 #include <memory>
 #include <numeric>
 #include <optional>
 
-namespace cudf {
-
-/**
- * @brief A device span consisting of two separate device_spans acting as if they were part of a
- * single span. The first head.size() entries are served from the first span, the remaining
- * tail.size() entries are served from the second span.
- *
- * @tparam T The type of elements in the span.
- */
-template <typename T>
-class split_device_span {
- public:
-  explicit constexpr split_device_span(device_span<T> head, device_span<T> tail = {})
-    : _head{head}, _tail{tail}
-  {
-  }
-
-  [[nodiscard]] constexpr T& operator[](size_type i)
-  {
-    return i < _head.size() ? _head[i] : _tail[i - _head.size()];
-  }
-
-  [[nodiscard]] constexpr const T& operator[](size_type i) const
-  {
-    return i < _head.size() ? _head[i] : _tail[i - _head.size()];
-  }
-
-  [[nodiscard]] constexpr size_type size() const { return _head.size() + _tail.size(); }
-
-  [[nodiscard]] constexpr device_span<T> head() const { return _head; }
-
-  [[nodiscard]] constexpr device_span<T> tail() const { return _tail; }
-
- private:
-  device_span<T> _head;
-  device_span<T> _tail;
-};
-
-}  // namespace cudf
-
 namespace {
 
 using cudf::io::text::detail::multistate;
@@ -385,172 +340,6 @@ std::vector<rmm::cuda_stream_view> get_streams(int32_t count, rmm::cuda_stream_p
   return streams;
 }
 
-/**
- * @brief A chunked storage class that provides preallocated memory for algorithms with known
- * worst-case output size. It provides functionality to retrieve the next chunk to write to, for
- * reporting how much memory was actually written and for gathering all previously written outputs
- * into a single contiguous vector.
- *
- * @tparam T The output element type.
- */
-template <typename T>
-class output_builder {
- public:
-  using size_type = typename rmm::device_uvector<T>::size_type;
-
-  /**
-   * @brief Initializes an output builder with given worst-case output size and stream.
-   *
-   * @param max_write_size the maximum number of elements that will be written into a
-   *                       split_device_span returned from `next_output`.
-   * @param stream the stream used to allocate the first chunk of memory.
-   * @param mr optional, the memory resource to use for allocation.
-   */
-  output_builder(size_type max_write_size,
-                 size_type max_growth,
-                 rmm::cuda_stream_view stream,
-                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
-    : _size{0}, _max_write_size{max_write_size}, _max_growth{max_growth}
-  {
-    CUDF_EXPECTS(max_write_size > 0, "Internal error");
-    _chunks.emplace_back(0, stream, mr);
-    _chunks.back().reserve(max_write_size * 2, stream);
-  }
-
-  output_builder(output_builder&&)      = delete;
-  output_builder(const output_builder&) = delete;
-  output_builder& operator=(output_builder&&) = delete;
-  output_builder& operator=(const output_builder&) = delete;
-
-  /**
-   * @brief Returns the next free chunk of `max_write_size` elements from the underlying storage.
-   * Must be followed by a call to `advance_output` after the memory has been written to.
-   *
-   * @param stream The stream to allocate a new chunk of memory with, if necessary.
-   *               This should be the stream that will write to the `split_device_span`.
-   * @return A `split_device_span` starting directly after the last output and providing at least
-   *         `max_write_size` entries of storage.
-   */
-  [[nodiscard]] split_device_span<T> next_output(rmm::cuda_stream_view stream)
-  {
-    auto head_it   = _chunks.end() - (_chunks.size() > 1 and _chunks.back().is_empty() ? 2 : 1);
-    auto head_span = get_free_span(*head_it);
-    if (head_span.size() >= _max_write_size) { return split_device_span<T>{head_span}; }
-    if (head_it == _chunks.end() - 1) {
-      // insert a new vector of double size
-      auto const next_chunk_size =
-        std::min(_max_growth * _max_write_size, 2 * _chunks.back().capacity());
-      _chunks.emplace_back(0, stream, _chunks.back().memory_resource());
-      _chunks.back().reserve(next_chunk_size, stream);
-    }
-    auto tail_span = get_free_span(_chunks.back());
-    CUDF_EXPECTS(head_span.size() + tail_span.size() >= _max_write_size, "Internal error");
-    return split_device_span<T>{head_span, tail_span};
-  }
-
-  /**
-   * @brief Advances the output sizes after a `split_device_span` returned from `next_output` was
-   *        written to.
-   *
-   * @param actual_size The number of elements that were written to the result of the previous
-   *                    `next_output` call.
-   */
-  void advance_output(size_type actual_size, rmm::cuda_stream_view stream)
-  {
-    CUDF_EXPECTS(actual_size <= _max_write_size, "Internal error");
-    if (_chunks.size() < 2) {
-      auto const new_size = _chunks.back().size() + actual_size;
-      inplace_resize(_chunks.back(), new_size, stream);
-    } else {
-      auto& tail              = _chunks.back();
-      auto& prev              = _chunks.rbegin()[1];
-      auto const prev_advance = std::min(actual_size, prev.capacity() - prev.size());
-      auto const tail_advance = actual_size - prev_advance;
-      inplace_resize(prev, prev.size() + prev_advance, stream);
-      inplace_resize(tail, tail.size() + tail_advance, stream);
-    }
-    _size += actual_size;
-  }
-
-  /**
-   * @brief Returns the first element that was written to the output.
-   *        Requires a previous call to `next_output` and `advance_output` and `size() > 0`.
-   * @param stream The stream used to access the element.
-   * @return The first element that was written to the output.
-   */
-  [[nodiscard]] T front_element(rmm::cuda_stream_view stream) const
-  {
-    return _chunks.front().front_element(stream);
-  }
-
-  /**
-   * @brief Returns the last element that was written to the output.
-   *        Requires a previous call to `next_output` and `advance_output` and `size() > 0`.
-   * @param stream The stream used to access the element.
-   * @return The last element that was written to the output.
-   */
-  [[nodiscard]] T back_element(rmm::cuda_stream_view stream) const
-  {
-    auto const& last_nonempty_chunk =
-      _chunks.size() > 1 and _chunks.back().is_empty() ? _chunks.rbegin()[1] : _chunks.back();
-    return last_nonempty_chunk.back_element(stream);
-  }
-
-  [[nodiscard]] size_type size() const { return _size; }
-
-  /**
-   * @brief Gathers all previously written outputs into a single contiguous vector.
-   *
-   * @param stream The stream used to allocate and gather the output vector. All previous write
-   *               operations to the output buffer must have finished or happened on this stream.
-   * @param mr The memory resource used to allocate the output vector.
-   * @return The output vector.
-   */
-  rmm::device_uvector<T> gather(rmm::cuda_stream_view stream,
-                                rmm::mr::device_memory_resource* mr) const
-  {
-    rmm::device_uvector<T> output{size(), stream, mr};
-    auto output_it = output.begin();
-    for (auto const& chunk : _chunks) {
-      output_it = thrust::copy(
-        rmm::exec_policy_nosync(stream), chunk.begin(), chunk.begin() + chunk.size(), output_it);
-    }
-    return output;
-  }
-
- private:
-  /**
-   * @brief Resizes a vector without reallocating
-   *
-   * @param vector The vector
-   * @param new_size The new size. Must be smaller than the vector's capacity
-   */
-  static void inplace_resize(rmm::device_uvector<T>& vector,
-                             size_type new_size,
-                             rmm::cuda_stream_view stream)
-  {
-    CUDF_EXPECTS(new_size <= vector.capacity(), "Internal error");
-    vector.resize(new_size, stream);
-  }
-
-  /**
-   * @brief Returns the span consisting of all currently unused elements in the vector
-   * (`i >= size() and i < capacity()`).
-   *
-   * @param vector The vector.
-   * @return The span of unused elements.
-   */
-  static device_span<T> get_free_span(rmm::device_uvector<T>& vector)
-  {
-    return device_span<T>{vector.data() + vector.size(), vector.capacity() - vector.size()};
-  }
-
-  size_type _size;
-  size_type _max_write_size;
-  size_type _max_growth;
-  std::vector<rmm::device_uvector<T>> _chunks;
-};
-
 std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source const& source,
                                               std::string const& delimiter,
                                               byte_range_info byte_range,
@@ -732,9 +521,7 @@ std::unique_ptr<cudf::column> multibyte_split(cudf::io::text::data_chunk_source
         chunk->data() + std::min<byte_offset>(sentinel - chunk_offset, chunk->size());
       auto const output_size = end - begin;
       auto char_output       = char_storage.next_output(scan_stream);
-      auto const split = begin + std::min<byte_offset>(output_size, char_output.head().size());
-      thrust::copy(rmm::exec_policy_nosync(scan_stream), begin, split, char_output.head().begin());
-      thrust::copy(rmm::exec_policy_nosync(scan_stream), split, end, char_output.tail().begin());
+      thrust::copy(rmm::exec_policy_nosync(scan_stream), begin, end, char_output.begin());
       char_storage.advance_output(output_size, scan_stream);
     }
 
diff --git a/cpp/src/io/utilities/output_builder.cuh b/cpp/src/io/utilities/output_builder.cuh
new file mode 100644
index 00000000000..e45143480fc
--- /dev/null
+++ b/cpp/src/io/utilities/output_builder.cuh
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/types.hpp>
+#include <cudf/utilities/error.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
+
+#include <thrust/copy.h>
+
+#include <iterator>
+
+namespace cudf {
+
+template <typename T>
+class split_device_span_iterator;
+
+/**
+ * @brief A device span consisting of two separate device_spans acting as if they were part of a
+ * single span. The first head.size() entries are served from the first span, the remaining
+ * tail.size() entries are served from the second span.
+ *
+ * @tparam T The type of elements in the span.
+ */
+template <typename T>
+class split_device_span {
+ public:
+  using element_type    = T;
+  using value_type      = std::remove_cv<T>;
+  using size_type       = std::size_t;
+  using difference_type = std::ptrdiff_t;
+  using pointer         = T*;
+  using iterator        = split_device_span_iterator<T>;
+  using const_pointer   = T const*;
+  using reference       = T&;
+  using const_reference = T const&;
+
+  split_device_span() = default;
+
+  explicit constexpr split_device_span(device_span<T> head, device_span<T> tail = {})
+    : _head{head}, _tail{tail}
+  {
+  }
+
+  [[nodiscard]] constexpr reference operator[](size_type i) const
+  {
+    return i < _head.size() ? _head[i] : _tail[i - _head.size()];
+  }
+
+  [[nodiscard]] constexpr size_type size() const { return _head.size() + _tail.size(); }
+
+  [[nodiscard]] constexpr device_span<T> head() const { return _head; }
+
+  [[nodiscard]] constexpr device_span<T> tail() const { return _tail; }
+
+  [[nodiscard]] constexpr iterator begin() const;
+
+  [[nodiscard]] constexpr iterator end() const;
+
+ private:
+  device_span<T> _head;
+  device_span<T> _tail;
+};
+
+/**
+ * @brief A random access iterator indexing into a split_device_span.
+ *
+ * @tparam T The type of elements in the underlying span.
+ */
+template <typename T>
+class split_device_span_iterator {
+  using it = split_device_span_iterator;
+
+ public:
+  using size_type         = std::size_t;
+  using difference_type   = std::ptrdiff_t;
+  using value_type        = T;
+  using pointer           = value_type*;
+  using reference         = value_type&;
+  using iterator_category = std::random_access_iterator_tag;
+
+  split_device_span_iterator() = default;
+
+  constexpr split_device_span_iterator(split_device_span<T> span, size_type offset)
+    : _span{span}, _offset{offset}
+  {
+  }
+
+  [[nodiscard]] constexpr reference operator*() const { return _span[_offset]; }
+
+  [[nodiscard]] constexpr reference operator[](size_type i) const { return _span[_offset + i]; }
+
+  [[nodiscard]] constexpr friend bool operator==(const it& lhs, const it& rhs)
+  {
+    return lhs._offset == rhs._offset;
+  }
+
+  [[nodiscard]] constexpr friend bool operator!=(const it& lhs, const it& rhs)
+  {
+    return !(lhs == rhs);
+  }
+  [[nodiscard]] constexpr friend bool operator<(const it& lhs, const it& rhs)
+  {
+    return lhs._offset < rhs._offset;
+  }
+
+  [[nodiscard]] constexpr friend bool operator>=(const it& lhs, const it& rhs)
+  {
+    return !(lhs < rhs);
+  }
+
+  [[nodiscard]] constexpr friend bool operator>(const it& lhs, const it& rhs) { return rhs < lhs; }
+
+  [[nodiscard]] constexpr friend bool operator<=(const it& lhs, const it& rhs)
+  {
+    return !(lhs > rhs);
+  }
+
+  [[nodiscard]] constexpr friend difference_type operator-(const it& lhs, const it& rhs)
+  {
+    return lhs._offset - rhs._offset;
+  }
+
+  [[nodiscard]] constexpr friend it operator+(it lhs, difference_type i) { return lhs += i; }
+
+  constexpr it& operator+=(difference_type i)
+  {
+    _offset += i;
+    return *this;
+  }
+
+  constexpr it& operator-=(difference_type i) { return *this += -i; }
+
+  constexpr it& operator++() { return *this += 1; }
+
+  constexpr it& operator--() { return *this -= 1; }
+
+  constexpr it operator++(int)
+  {
+    auto result = *this;
+    ++*this;
+    return result;
+  }
+
+  constexpr it operator--(int)
+  {
+    auto result = *this;
+    --*this;
+    return result;
+  }
+
+ private:
+  split_device_span<T> _span;
+  size_type _offset;
+};
+
+template <typename T>
+[[nodiscard]] constexpr split_device_span_iterator<T> split_device_span<T>::begin() const
+{
+  return {*this, 0};
+}
+
+template <typename T>
+[[nodiscard]] constexpr split_device_span_iterator<T> split_device_span<T>::end() const
+{
+  return {*this, size()};
+}
+
+/**
+ * @brief A chunked storage class that provides preallocated memory for algorithms with known
+ * worst-case output size. It provides functionality to retrieve the next chunk to write to, for
+ * reporting how much memory was actually written and for gathering all previously written outputs
+ * into a single contiguous vector.
+ *
+ * @tparam T The output element type.
+ */
+template <typename T>
+class output_builder {
+ public:
+  using size_type = typename rmm::device_uvector<T>::size_type;
+
+  /**
+   * @brief Initializes an output builder with given worst-case output size and stream.
+   *
+   * @param max_write_size the maximum number of elements that will be written into a
+   *                       split_device_span returned from `next_output`.
+   * @param stream the stream used to allocate the first chunk of memory.
+   * @param mr optional, the memory resource to use for allocation.
+   */
+  output_builder(size_type max_write_size,
+                 size_type max_growth,
+                 rmm::cuda_stream_view stream,
+                 rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+    : _size{0}, _max_write_size{max_write_size}, _max_growth{max_growth}
+  {
+    CUDF_EXPECTS(max_write_size > 0, "Internal error");
+    _chunks.emplace_back(0, stream, mr);
+    _chunks.back().reserve(max_write_size * 2, stream);
+  }
+
+  output_builder(output_builder&&)      = delete;
+  output_builder(const output_builder&) = delete;
+  output_builder& operator=(output_builder&&) = delete;
+  output_builder& operator=(const output_builder&) = delete;
+
+  /**
+   * @brief Returns the next free chunk of `max_write_size` elements from the underlying storage.
+   * Must be followed by a call to `advance_output` after the memory has been written to.
+   *
+   * @param stream The stream to allocate a new chunk of memory with, if necessary.
+   *               This should be the stream that will write to the `split_device_span`.
+   * @return A `split_device_span` starting directly after the last output and providing at least
+   *         `max_write_size` entries of storage.
+   */
+  [[nodiscard]] split_device_span<T> next_output(rmm::cuda_stream_view stream)
+  {
+    auto head_it   = _chunks.end() - (_chunks.size() > 1 and _chunks.back().is_empty() ? 2 : 1);
+    auto head_span = get_free_span(*head_it);
+    if (head_span.size() >= _max_write_size) { return split_device_span<T>{head_span}; }
+    if (head_it == _chunks.end() - 1) {
+      // insert a new device_uvector of double size
+      auto const next_chunk_size =
+        std::min(_max_growth * _max_write_size, 2 * _chunks.back().capacity());
+      _chunks.emplace_back(0, stream, _chunks.back().memory_resource());
+      _chunks.back().reserve(next_chunk_size, stream);
+    }
+    auto tail_span = get_free_span(_chunks.back());
+    CUDF_EXPECTS(head_span.size() + tail_span.size() >= _max_write_size, "Internal error");
+    return split_device_span<T>{head_span, tail_span};
+  }
+
+  /**
+   * @brief Advances the output sizes after a `split_device_span` returned from `next_output` was
+   *        written to.
+   *
+   * @param actual_size The number of elements that were written to the result of the previous
+   *                    `next_output` call.
+   * @param stream The stream on which to resize the vectors. Since this function will not
+   *               reallocate, this only changes the stream of the internally stored vectors,
+   *               impacting their subsequent copy and destruction behavior.
+   */
+  void advance_output(size_type actual_size, rmm::cuda_stream_view stream)
+  {
+    CUDF_EXPECTS(actual_size <= _max_write_size, "Internal error");
+    if (_chunks.size() < 2) {
+      auto const new_size = _chunks.back().size() + actual_size;
+      inplace_resize(_chunks.back(), new_size, stream);
+    } else {
+      auto& tail              = _chunks.back();
+      auto& prev              = _chunks.rbegin()[1];
+      auto const prev_advance = std::min(actual_size, prev.capacity() - prev.size());
+      auto const tail_advance = actual_size - prev_advance;
+      inplace_resize(prev, prev.size() + prev_advance, stream);
+      inplace_resize(tail, tail.size() + tail_advance, stream);
+    }
+    _size += actual_size;
+  }
+
+  /**
+   * @brief Returns the first element that was written to the output.
+   *        Requires a previous call to `next_output` and `advance_output` and `size() > 0`.
+   * @param stream The stream used to access the element.
+   * @return The first element that was written to the output.
+   */
+  [[nodiscard]] T front_element(rmm::cuda_stream_view stream) const
+  {
+    return _chunks.front().front_element(stream);
+  }
+
+  /**
+   * @brief Returns the last element that was written to the output.
+   *        Requires a previous call to `next_output` and `advance_output` and `size() > 0`.
+   * @param stream The stream used to access the element.
+   * @return The last element that was written to the output.
+   */
+  [[nodiscard]] T back_element(rmm::cuda_stream_view stream) const
+  {
+    auto const& last_nonempty_chunk =
+      _chunks.size() > 1 and _chunks.back().is_empty() ? _chunks.rbegin()[1] : _chunks.back();
+    return last_nonempty_chunk.back_element(stream);
+  }
+
+  [[nodiscard]] size_type size() const { return _size; }
+
+  /**
+   * @brief Gathers all previously written outputs into a single contiguous vector.
+   *
+   * @param stream The stream used to allocate and gather the output vector. All previous write
+   *               operations to the output buffer must have finished or happened on this stream.
+   * @param mr The memory resource used to allocate the output vector.
+   * @return The output vector.
+   */
+  rmm::device_uvector<T> gather(rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr) const
+  {
+    rmm::device_uvector<T> output{size(), stream, mr};
+    auto output_it = output.begin();
+    for (auto const& chunk : _chunks) {
+      output_it = thrust::copy(
+        rmm::exec_policy_nosync(stream), chunk.begin(), chunk.begin() + chunk.size(), output_it);
+    }
+    return output;
+  }
+
+ private:
+  /**
+   * @brief Resizes a vector without reallocating
+   *
+   * @param vector The vector
+   * @param new_size The new size. Must be smaller than the vector's capacity
+   * @param stream The stream on which to resize the vector. Since this function will not
+   *               reallocate, this only changes the stream of `vector`, impacting its subsequent
+   *               copy and destruction behavior.
+   */
+  static void inplace_resize(rmm::device_uvector<T>& vector,
+                             size_type new_size,
+                             rmm::cuda_stream_view stream)
+  {
+    CUDF_EXPECTS(new_size <= vector.capacity(), "Internal error");
+    vector.resize(new_size, stream);
+  }
+
+  /**
+   * @brief Returns the span consisting of all currently unused elements in the vector
+   *        (`i >= size() and i < capacity()`).
+   *
+   * @param vector The vector.
+   * @return The span of unused elements.
+   */
+  static device_span<T> get_free_span(rmm::device_uvector<T>& vector)
+  {
+    return device_span<T>{vector.data() + vector.size(), vector.capacity() - vector.size()};
+  }
+
+  size_type _size;
+  size_type _max_write_size;
+  size_type _max_growth;
+  std::vector<rmm::device_uvector<T>> _chunks;
+};
+
+}  // namespace cudf
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index 2da7073b334..2783b006982 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <io/utilities/output_builder.cuh>
+
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
@@ -26,6 +28,7 @@
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/multibyte_split.hpp>
 #include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
 
 using namespace cudf;
 using namespace test;
@@ -499,4 +502,65 @@ TEST_F(MultibyteSplitTest, EmptyRangeSingleByte)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *out, debug_output_level::ALL_ERRORS);
 }
 
+TEST_F(MultibyteSplitTest, EmptySplitDeviceSpan)
+{
+  cudf::split_device_span<int> span;
+  ASSERT_EQ(span.size(), 0);
+  ASSERT_EQ(span.head().size(), 0);
+  ASSERT_EQ(span.head().data(), nullptr);
+  ASSERT_EQ(span.tail().size(), 0);
+  ASSERT_EQ(span.tail().data(), nullptr);
+}
+
+TEST_F(MultibyteSplitTest, SplitDeviceSpan)
+{
+  int i = 0;
+  int j = 1;
+  cudf::split_device_span<int> span{{&i, 1}, {&j, 1}};
+  ASSERT_EQ(span.size(), 2);
+  ASSERT_EQ(span.head().size(), 1);
+  ASSERT_EQ(span.head().data(), &i);
+  ASSERT_EQ(span.tail().size(), 1);
+  ASSERT_EQ(span.tail().data(), &j);
+  ASSERT_EQ(&span[0], &i);
+  ASSERT_EQ(&span[1], &j);
+  ASSERT_EQ(&*span.begin(), &i);
+  ASSERT_EQ(&*(span.begin() + 1), &j);
+  ASSERT_NE(span.begin() + 1, span.end());
+  ASSERT_EQ(span.begin() + 2, span.end());
+}
+
+TEST_F(MultibyteSplitTest, OutputBuilder)
+{
+  auto const stream = cudf::get_default_stream();
+  cudf::output_builder<char> builder{10, 4, stream};
+  auto const output = builder.next_output(stream);
+  ASSERT_GE(output.size(), 10);
+  ASSERT_EQ(output.tail().size(), 0);
+  ASSERT_EQ(output.tail().data(), nullptr);
+  ASSERT_EQ(builder.size(), 0);
+  builder.advance_output(1, stream);
+  ASSERT_EQ(builder.size(), 1);
+  auto const output2 = builder.next_output(stream);
+  ASSERT_EQ(output2.head().data(), output.head().data() + 1);
+  builder.advance_output(10, stream);
+  ASSERT_EQ(builder.size(), 11);
+  auto const output3 = builder.next_output(stream);
+  ASSERT_EQ(output3.head().size(), 9);
+  ASSERT_EQ(output3.head().data(), output.head().data() + 11);
+  ASSERT_EQ(output3.tail().size(), 40);
+  builder.advance_output(9, stream);
+  ASSERT_EQ(builder.size(), 20);
+  auto const output4 = builder.next_output(stream);
+  ASSERT_EQ(output4.head().size(), 0);
+  ASSERT_EQ(output4.tail().size(), output3.tail().size());
+  ASSERT_EQ(output4.tail().data(), output3.tail().data());
+  builder.advance_output(1, stream);
+  auto const output5 = builder.next_output(stream);
+  ASSERT_EQ(output5.head().size(), 39);
+  ASSERT_EQ(output5.head().data(), output4.tail().data() + 1);
+  ASSERT_EQ(output5.tail().size(), 0);
+  ASSERT_EQ(output5.tail().data(), nullptr);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From bac20048c488827747ba7ba9c596af9f38aceff7 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 27 Oct 2022 15:03:48 -0500
Subject: [PATCH 087/202] Add pivot_table and crosstab to docs. (#12014)

This PR resolves #12012 by adding `cudf.pivot_table` and `cudf.crosstab` to the documentation.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/12014
---
 docs/cudf/source/api_docs/general_functions.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/cudf/source/api_docs/general_functions.rst b/docs/cudf/source/api_docs/general_functions.rst
index 272d95e84bc..40e1b766dc9 100644
--- a/docs/cudf/source/api_docs/general_functions.rst
+++ b/docs/cudf/source/api_docs/general_functions.rst
@@ -14,6 +14,8 @@ Data manipulations
    cudf.get_dummies
    cudf.melt
    cudf.pivot
+   cudf.pivot_table
+   cudf.crosstab
    cudf.unstack
 
 Top-level conversions

From 1b1ca7c9005c32bd86d8cd80ad076e1ed345db8b Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 27 Oct 2022 22:20:08 +0200
Subject: [PATCH 088/202] Provide `data_chunk_source` wrapper for `datasource`
 (#11886)

With `datasource` being more generic in its interface than `data_chunk_source`, this PR adds a wrapper that wraps a `datasource` in a `data_chunk_source` for use in `multibyte_split`. Its host read implementation is based on the file `data_chunk_source`

Authors:
  - Tobias Ribizel (https://github.com/upsj)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11886
---
 cpp/benchmarks/io/text/multibyte_split.cpp    |  23 ++--
 .../io/text/data_chunk_source_factories.hpp   |   9 ++
 .../io/text/data_chunk_source_factories.cpp   | 116 ++++++++++++++++--
 cpp/tests/io/text/data_chunk_source_test.cpp  |  29 +++++
 4 files changed, 161 insertions(+), 16 deletions(-)

diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index c0e82b34623..56ac4d4ab73 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -45,7 +45,7 @@
 
 temp_directory const temp_dir("cudf_nvbench");
 
-enum class data_chunk_source_type { device, file, host, host_pinned, file_bgzip };
+enum class data_chunk_source_type { device, file, file_datasource, host, host_pinned, file_bgzip };
 
 NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   data_chunk_source_type,
@@ -53,6 +53,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
     switch (value) {
       case data_chunk_source_type::device: return "device";
       case data_chunk_source_type::file: return "file";
+      case data_chunk_source_type::file_datasource: return "file_datasource";
       case data_chunk_source_type::host: return "host";
       case data_chunk_source_type::host_pinned: return "host_pinned";
       case data_chunk_source_type::file_bgzip: return "file_bgzip";
@@ -134,13 +135,14 @@ static void bench_multibyte_split(nvbench::state& state,
   std::iota(delim.begin(), delim.end(), '1');
 
   auto const delim_factor = static_cast<double>(delim_percent) / 100;
-  auto device_input       = create_random_input(file_size_approx, delim_factor, 0.05, delim);
-  auto host_input         = std::vector<char>{};
+  std::unique_ptr<cudf::io::datasource> datasource;
+  auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
+  auto host_input   = std::vector<char>{};
   auto host_pinned_input =
     thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>>{};
 
-  if (source_type == data_chunk_source_type::host || source_type == data_chunk_source_type::file ||
-      source_type == data_chunk_source_type::file_bgzip) {
+  if (source_type != data_chunk_source_type::device &&
+      source_type != data_chunk_source_type::host_pinned) {
     host_input = cudf::detail::make_std_vector_sync<char>(
       {device_input.data(), static_cast<std::size_t>(device_input.size())},
       cudf::get_default_stream());
@@ -155,11 +157,17 @@ static void bench_multibyte_split(nvbench::state& state,
 
   auto source = [&] {
     switch (source_type) {
-      case data_chunk_source_type::file: {
+      case data_chunk_source_type::file:
+      case data_chunk_source_type::file_datasource: {
         auto const temp_file_name = random_file_in_dir(temp_dir.path());
         std::ofstream(temp_file_name, std::ofstream::out)
           .write(host_input.data(), host_input.size());
-        return cudf::io::text::make_source_from_file(temp_file_name);
+        if (source_type == data_chunk_source_type::file) {
+          return cudf::io::text::make_source_from_file(temp_file_name);
+        } else {
+          datasource = cudf::io::datasource::create(temp_file_name);
+          return cudf::io::text::make_source(*datasource);
+        }
       }
       case data_chunk_source_type::host:  //
         return cudf::io::text::make_source(host_input);
@@ -199,6 +207,7 @@ static void bench_multibyte_split(nvbench::state& state,
 
 using source_type_list = nvbench::enum_type_list<data_chunk_source_type::device,
                                                  data_chunk_source_type::file,
+                                                 data_chunk_source_type::file_datasource,
                                                  data_chunk_source_type::host,
                                                  data_chunk_source_type::host_pinned,
                                                  data_chunk_source_type::file_bgzip>;
diff --git a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
index 6f94fb170a8..f5230863f17 100644
--- a/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
+++ b/cpp/include/cudf/io/text/data_chunk_source_factories.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf/io/datasource.hpp>
 #include <cudf/io/text/data_chunk_source.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/utilities/span.hpp>
@@ -25,6 +26,14 @@
 
 namespace cudf::io::text {
 
+/**
+ * @brief Creates a data source capable of producing device-buffered views of a datasource.
+ * @param data the datasource to be exposed as a data chunk source
+ * @return the data chunk source for the provided datasource. It must not outlive the datasource
+ *         used to construct it.
+ */
+std::unique_ptr<data_chunk_source> make_source(datasource& data);
+
 /**
  * @brief Creates a data source capable of producing device-buffered views of the given string.
  * @param data the host data to be exposed as a data chunk source. Its lifetime must be at least as
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index 9a549951d66..b910037c5d2 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -30,6 +30,86 @@ namespace cudf::io::text {
 
 namespace {
 
+/**
+ * @brief A reader which produces owning chunks of device memory which contain a copy of the data
+ * from an istream.
+ */
+class datasource_chunk_reader : public data_chunk_reader {
+  struct host_ticket {
+    cudaEvent_t event;
+    thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>> buffer;
+  };
+
+  constexpr static int num_tickets = 2;
+
+ public:
+  datasource_chunk_reader(datasource* source) : _source(source)
+  {
+    // create an event to track the completion of the last device-to-host copy.
+    for (auto& ticket : _tickets) {
+      CUDF_CUDA_TRY(cudaEventCreate(&(ticket.event)));
+    }
+  }
+
+  ~datasource_chunk_reader() override
+  {
+    for (auto& ticket : _tickets) {
+      CUDF_CUDA_TRY(cudaEventDestroy(ticket.event));
+    }
+  }
+
+  void skip_bytes(std::size_t size) override
+  {
+    _offset += std::min(_source->size() - _offset, size);
+  };
+
+  std::unique_ptr<device_data_chunk> get_next_chunk(std::size_t read_size,
+                                                    rmm::cuda_stream_view stream) override
+  {
+    CUDF_FUNC_RANGE();
+
+    read_size = std::min(_source->size() - _offset, read_size);
+
+    // get a device buffer containing read data on the device.
+    auto chunk = rmm::device_uvector<char>(read_size, stream);
+
+    if (_source->supports_device_read() && _source->is_device_read_preferred(read_size)) {
+      _source->device_read_async(
+        _offset, read_size, reinterpret_cast<uint8_t*>(chunk.data()), stream);
+    } else {
+      auto& h_ticket = _tickets[_next_ticket_idx];
+
+      _next_ticket_idx = (_next_ticket_idx + 1) % num_tickets;
+
+      // synchronize on the last host-to-device copy, so we don't clobber the host buffer.
+      CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
+
+      // resize the host buffer as necessary to contain the requested number of bytes
+      if (h_ticket.buffer.size() < read_size) { h_ticket.buffer.resize(read_size); }
+
+      _source->host_read(_offset, read_size, reinterpret_cast<uint8_t*>(h_ticket.buffer.data()));
+
+      // copy the host-pinned data on to device
+      CUDF_CUDA_TRY(cudaMemcpyAsync(
+        chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyHostToDevice, stream.value()));
+
+      // record the host-to-device copy.
+      CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value()));
+    }
+
+    _offset += read_size;
+
+    // return the device buffer so it can be processed.
+    return std::make_unique<device_uvector_data_chunk>(std::move(chunk));
+  }
+
+ private:
+  std::size_t _offset          = 0;
+  std::size_t _next_ticket_idx = 0;
+  std::array<host_ticket, num_tickets> _tickets{};
+  datasource* _source;
+};
+
 /**
  * @brief A reader which produces owning chunks of device memory which contain a copy of the data
  * from an istream.
@@ -40,9 +120,11 @@ class istream_data_chunk_reader : public data_chunk_reader {
     thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>> buffer;
   };
 
+  constexpr static int num_tickets = 2;
+
  public:
   istream_data_chunk_reader(std::unique_ptr<std::istream> datastream)
-    : _datastream(std::move(datastream)), _tickets(2)
+    : _datastream(std::move(datastream))
   {
     // create an event to track the completion of the last device-to-host copy.
     for (auto& ticket : _tickets) {
@@ -66,7 +148,7 @@ class istream_data_chunk_reader : public data_chunk_reader {
 
     auto& h_ticket = _tickets[_next_ticket_idx];
 
-    _next_ticket_idx = (_next_ticket_idx + 1) % _tickets.size();
+    _next_ticket_idx = (_next_ticket_idx + 1) % num_tickets;
 
     // synchronize on the last host-to-device copy, so we don't clobber the host buffer.
     CUDF_CUDA_TRY(cudaEventSynchronize(h_ticket.event));
@@ -84,12 +166,8 @@ class istream_data_chunk_reader : public data_chunk_reader {
     auto chunk = rmm::device_uvector<char>(read_size, stream);
 
     // copy the host-pinned data on to device
-    CUDF_CUDA_TRY(cudaMemcpyAsync(  //
-      chunk.data(),
-      h_ticket.buffer.data(),
-      read_size,
-      cudaMemcpyHostToDevice,
-      stream.value()));
+    CUDF_CUDA_TRY(cudaMemcpyAsync(
+      chunk.data(), h_ticket.buffer.data(), read_size, cudaMemcpyHostToDevice, stream.value()));
 
     // record the host-to-device copy.
     CUDF_CUDA_TRY(cudaEventRecord(h_ticket.event, stream.value()));
@@ -100,8 +178,8 @@ class istream_data_chunk_reader : public data_chunk_reader {
 
  private:
   std::size_t _next_ticket_idx = 0;
+  std::array<host_ticket, num_tickets> _tickets{};
   std::unique_ptr<std::istream> _datastream;
-  std::vector<host_ticket> _tickets;
 };
 
 /**
@@ -180,6 +258,21 @@ class device_span_data_chunk_reader : public data_chunk_reader {
   uint64_t _position = 0;
 };
 
+/**
+ * @brief A datasource-based data chunk source which creates a datasource_chunk_reader.
+ */
+class datasource_chunk_source : public data_chunk_source {
+ public:
+  datasource_chunk_source(datasource& source) : _source(&source) {}
+  [[nodiscard]] std::unique_ptr<data_chunk_reader> create_reader() const override
+  {
+    return std::make_unique<datasource_chunk_reader>(_source);
+  }
+
+ private:
+  datasource* _source;
+};
+
 /**
  * @brief A file data source which creates an istream_data_chunk_reader.
  */
@@ -228,6 +321,11 @@ class device_span_data_chunk_source : public data_chunk_source {
 
 }  // namespace
 
+std::unique_ptr<data_chunk_source> make_source(datasource& data)
+{
+  return std::make_unique<datasource_chunk_source>(data);
+}
+
 std::unique_ptr<data_chunk_source> make_source(host_span<const char> data)
 {
   return std::make_unique<host_span_data_chunk_source>(data);
diff --git a/cpp/tests/io/text/data_chunk_source_test.cpp b/cpp/tests/io/text/data_chunk_source_test.cpp
index a3314c440a4..bbace9a5d49 100644
--- a/cpp/tests/io/text/data_chunk_source_test.cpp
+++ b/cpp/tests/io/text/data_chunk_source_test.cpp
@@ -96,6 +96,35 @@ void test_source(const std::string& content, const cudf::io::text::data_chunk_so
   }
 }
 
+TEST_F(DataChunkSourceTest, DataSourceHost)
+{
+  std::string const content = "host buffer source";
+  auto const datasource =
+    cudf::io::datasource::create(cudf::io::host_buffer{content.data(), content.size()});
+  auto const source = cudf::io::text::make_source(*datasource);
+
+  test_source(content, *source);
+}
+
+TEST_F(DataChunkSourceTest, DataSourceFile)
+{
+  std::string content = "file datasource";
+  // make it big enought to have is_device_read_preferred return true
+  content.reserve(content.size() << 20);
+  for (int i = 0; i < 20; i++) {
+    content += content;
+  }
+  auto const filename = temp_env->get_temp_filepath("file_source");
+  {
+    std::ofstream file{filename};
+    file << content;
+  }
+  auto const datasource = cudf::io::datasource::create(filename);
+  auto const source     = cudf::io::text::make_source(*datasource);
+
+  test_source(content, *source);
+}
+
 TEST_F(DataChunkSourceTest, Device)
 {
   std::string const content = "device buffer source";

From f17ea943d9d78735631f4fadcfc16ba2e8073dc3 Mon Sep 17 00:00:00 2001
From: Erik Welch <erik.n.welch@gmail.com>
Date: Thu, 27 Oct 2022 17:53:48 -0500
Subject: [PATCH 089/202] Fix bug where `df.loc` resulting in single row could
 give wrong index (#11998)

Fixes #11930

I can't figure out the purpose of these lines, so let's try removing them and run CI.

I haven't followed git blame back far enough to know the full story of these lines, but they originate at least three years ago:
https://github.com/rapidsai/cudf/pull/2208/files#diff-5f58cf9dfe537ce53c6481f690ba66ff10807da04ad82df1c79c6d112d19c08b

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/11998
---
 python/cudf/cudf/core/dataframe.py            | 19 +------------------
 python/cudf/cudf/tests/test_indexing.py       |  7 +++++++
 python/dask_cudf/dask_cudf/tests/test_core.py |  9 +++++++++
 3 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 126da0f883a..a3dd82d060e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -298,24 +298,7 @@ def _getitem_tuple_arg(self, arg):
                     if len(df) == 0:
                         raise KeyError(arg)
 
-        # Step 3: Gather index
-        if df.shape[0] == 1:  # we have a single row
-            if isinstance(arg[0], slice):
-                start = arg[0].start
-                if start is None:
-                    start = self._frame.index[0]
-                df.index = as_index(start, name=self._frame.index.name)
-            else:
-                row_selection = as_column(arg[0])
-                if is_bool_dtype(row_selection.dtype):
-                    df.index = self._frame.index._apply_boolean_mask(
-                        row_selection
-                    )
-                else:
-                    df.index = as_index(
-                        row_selection, name=self._frame.index.name
-                    )
-        # Step 4: Downcast
+        # Step 3: Downcast
         if self._can_downcast_to_series(df, arg):
             return self._downcast_to_series(df, arg)
         return df
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index d726ba16e86..b4143f9e00a 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1696,3 +1696,10 @@ def test_iloc_single_row_with_nullable_column():
 
     df.iloc[0]  # before the fix for #11349 this would segfault
     assert_eq(pdf.iloc[0], df.iloc[0])
+
+
+def test_loc_single_row_from_slice():
+    # see https://github.com/rapidsai/cudf/issues/11930
+    pdf = pd.DataFrame({"a": [10, 20, 30], "b": [1, 2, 3]}).set_index("a")
+    df = cudf.from_pandas(pdf)
+    assert_eq(pdf.loc[5:10], df.loc[5:10])
diff --git a/python/dask_cudf/dask_cudf/tests/test_core.py b/python/dask_cudf/dask_cudf/tests/test_core.py
index f7c46466705..82fd9b86ed5 100644
--- a/python/dask_cudf/dask_cudf/tests/test_core.py
+++ b/python/dask_cudf/dask_cudf/tests/test_core.py
@@ -490,6 +490,15 @@ def test_repartition_hash(by, npartitions, max_branch):
     dd.assert_eq(got_unique, expect_unique, check_index=False)
 
 
+def test_repartition_no_extra_row():
+    # see https://github.com/rapidsai/cudf/issues/11930
+    gdf = cudf.DataFrame({"a": [10, 20, 30], "b": [1, 2, 3]}).set_index("a")
+    ddf = dgd.from_cudf(gdf, npartitions=1)
+    ddf_new = ddf.repartition([0, 5, 10, 30], force=True)
+    dd.assert_eq(ddf, ddf_new)
+    dd.assert_eq(gdf, ddf_new)
+
+
 @pytest.fixture
 def pdf():
     return pd.DataFrame(

From 69fac8a2ca9110b906a0fda14f48553a66e6459a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 27 Oct 2022 15:57:25 -0700
Subject: [PATCH 090/202] Remove unused `managed_allocator` (#12005)

The `managed_allocator` class is not used anywhere. All uses of cuco maps or the `concurrent_unordered_map` just use the `default_allocator`.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/12005
---
 cpp/src/hash/hash_allocator.cuh | 36 ---------------------------------
 1 file changed, 36 deletions(-)

diff --git a/cpp/src/hash/hash_allocator.cuh b/cpp/src/hash/hash_allocator.cuh
index b3d2556d392..709b72d4fd2 100644
--- a/cpp/src/hash/hash_allocator.cuh
+++ b/cpp/src/hash/hash_allocator.cuh
@@ -26,42 +26,6 @@
 #include <rmm/mr/device/managed_memory_resource.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
-template <class T>
-struct managed_allocator {
-  using value_type                    = T;
-  rmm::mr::device_memory_resource* mr = new rmm::mr::managed_memory_resource;
-
-  managed_allocator() = default;
-
-  template <class U>
-  constexpr managed_allocator(const managed_allocator<U>&) noexcept
-  {
-  }
-
-  T* allocate(std::size_t n, rmm::cuda_stream_view stream = cudf::get_default_stream()) const
-  {
-    return static_cast<T*>(mr->allocate(n * sizeof(T), stream));
-  }
-
-  void deallocate(T* p,
-                  std::size_t n,
-                  rmm::cuda_stream_view stream = cudf::get_default_stream()) const
-  {
-    mr->deallocate(p, n * sizeof(T), stream);
-  }
-};
-
-template <class T, class U>
-bool operator==(const managed_allocator<T>&, const managed_allocator<U>&)
-{
-  return true;
-}
-template <class T, class U>
-bool operator!=(const managed_allocator<T>&, const managed_allocator<U>&)
-{
-  return false;
-}
-
 template <class T>
 struct default_allocator {
   using value_type                    = T;

From 1017045f46c44d205d6294bbb95e7bade1415e9c Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 28 Oct 2022 04:33:49 -0700
Subject: [PATCH 091/202] Add DataFrame.pivot_table. (#12015)

This PR adds the method `DataFrame.pivot_table` to enhance pandas API compatibility. It uses the exact same arguments as `cudf.pivot_table` but automatically supplies the first argument (a DataFrame).

Related: #11314

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/12015
---
 docs/cudf/source/api_docs/dataframe.rst |  1 +
 python/cudf/cudf/core/dataframe.py      | 30 ++++++++++++++++++++-
 python/cudf/cudf/tests/test_reshape.py  | 36 +++++++++++++++++++++++++
 3 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index bd868e85cc7..f5c9053ec92 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -210,6 +210,7 @@ Reshaping, sorting, transposing
    DataFrame.interleave_columns
    DataFrame.partition_by_hash
    DataFrame.pivot
+   DataFrame.pivot_table
    DataFrame.scatter_by_map
    DataFrame.sort_values
    DataFrame.sort_index
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index a3dd82d060e..02c5542a88a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6407,11 +6407,39 @@ def append(
     @_cudf_nvtx_annotate
     @copy_docstring(reshape.pivot)
     def pivot(self, index, columns, values=None):
-
         return cudf.core.reshape.pivot(
             self, index=index, columns=columns, values=values
         )
 
+    @_cudf_nvtx_annotate
+    @copy_docstring(reshape.pivot_table)
+    def pivot_table(
+        self,
+        values=None,
+        index=None,
+        columns=None,
+        aggfunc="mean",
+        fill_value=None,
+        margins=False,
+        dropna=None,
+        margins_name="All",
+        observed=False,
+        sort=True,
+    ):
+        return cudf.core.reshape.pivot_table(
+            self,
+            values=values,
+            index=index,
+            columns=columns,
+            aggfunc=aggfunc,
+            fill_value=fill_value,
+            margins=margins,
+            dropna=dropna,
+            margins_name=margins_name,
+            observed=observed,
+            sort=sort,
+        )
+
     @_cudf_nvtx_annotate
     @copy_docstring(reshape.unstack)
     def unstack(self, level=-1, fill_value=None):
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index df03104eda4..181bff8512a 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -596,6 +596,42 @@ def test_pivot_table_simple(data, aggfunc, fill_value):
     assert_eq(expected, actual, check_dtype=False)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "A": ["one", "one", "two", "three"] * 6,
+            "B": ["A", "B", "C"] * 8,
+            "C": ["foo", "foo", "foo", "bar", "bar", "bar"] * 4,
+            "D": np.random.randn(24),
+            "E": np.random.randn(24),
+        }
+    ],
+)
+@pytest.mark.parametrize(
+    "aggfunc", ["mean", "count", {"D": "sum", "E": "count"}]
+)
+@pytest.mark.parametrize("fill_value", [0])
+def test_dataframe_pivot_table_simple(data, aggfunc, fill_value):
+    pdf = pd.DataFrame(data)
+    expected = pdf.pivot_table(
+        values=["D", "E"],
+        index=["A", "B"],
+        columns=["C"],
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+    )
+    cdf = cudf.DataFrame(data)
+    actual = cdf.pivot_table(
+        values=["D", "E"],
+        index=["A", "B"],
+        columns=["C"],
+        aggfunc=aggfunc,
+        fill_value=fill_value,
+    )
+    assert_eq(expected, actual, check_dtype=False)
+
+
 def test_crosstab_simple():
     a = np.array(
         [

From ee534582aee7e0b2a30b8b8ac4ad6ff2d6ba2f36 Mon Sep 17 00:00:00 2001
From: Ben Jarmak <104460670+jarmak-nv@users.noreply.github.com>
Date: Fri, 28 Oct 2022 08:43:31 -0500
Subject: [PATCH 092/202] New GHA to add issues/prs to project board (#12016)

This PR adds a small GitHub action to automatically add new issues and PRs to the cudf GitHub project. It does not impact existing issues/PRs.

Authors:
  - Ben Jarmak (https://github.com/jarmak-nv)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Jordan Jacobelli (https://github.com/Ethyling)

URL: https://github.com/rapidsai/cudf/pull/12016
---
 .github/workflows/add_to_project.yml | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 .github/workflows/add_to_project.yml

diff --git a/.github/workflows/add_to_project.yml b/.github/workflows/add_to_project.yml
new file mode 100644
index 00000000000..60f9d1e88d7
--- /dev/null
+++ b/.github/workflows/add_to_project.yml
@@ -0,0 +1,20 @@
+name: Add new issue/PR to project
+
+on:
+  issues:
+    types:
+      - opened
+      
+  pull_request_target:
+    types:
+      - opened
+
+jobs:
+  add-to-project:
+    name: Add issue or PR to project
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/add-to-project@v0.3.0
+        with:
+          project-url: https://github.com/orgs/rapidsai/projects/51
+          github-token: ${{ secrets.ADD_TO_PROJECT_GITHUB_TOKEN }}

From c91552363231a552c0d85acb41791de2c0a2c4e5 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 28 Oct 2022 08:45:23 -0700
Subject: [PATCH 093/202] Add deprecation warning for set_allocator. (#11958)

Resolves #11097.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/11958
---
 python/cudf/cudf/utils/utils.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 63bc6d59524..87596482d79 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -4,6 +4,7 @@
 import hashlib
 import os
 import traceback
+import warnings
 from functools import partial
 from typing import FrozenSet, Set, Union
 
@@ -213,6 +214,15 @@ def set_allocator(
         Enable logging (default ``False``).
         Enabling this option will introduce performance overhead.
     """
+    warnings.warn(
+        "The cudf.set_allocator function is deprecated and will be removed in "
+        "a future release. Please use rmm.reinitialize "
+        "(https://docs.rapids.ai/api/rmm/stable/api.html#rmm.reinitialize) "
+        'instead. Note that `cudf.set_allocator(allocator="managed")` is '
+        "equivalent to `rmm.reinitialize(managed_memory=True)`.",
+        FutureWarning,
+    )
+
     use_managed_memory = allocator == "managed"
 
     rmm.reinitialize(

From aaf251daa1fca7ce34b49d32a8e489ba19b621b7 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Fri, 28 Oct 2022 22:27:30 +0530
Subject: [PATCH 094/202] Performance improvement in JSON Tree traversal
 (#11919)

This PR improves performance of JSON Tree traversal - mainly in creation of column id.
- Replaced per-level processing with two-level hash algorithm
- Reduced memory usage for hash map (reduced oversubscription)

Other changes are
- Fail if tokens has error token in tree generation
- Created device_span version of device_parse_nested_json

Hits 2 GB/s in GV100 from 128MB json.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11919
---
 cpp/src/io/json/json_column.cu  |  20 +-
 cpp/src/io/json/json_tree.cu    | 569 +++++++++++++++-----------------
 cpp/src/io/json/nested_json.hpp |   2 +-
 cpp/tests/io/json_tree.cpp      |  20 ++
 4 files changed, 296 insertions(+), 315 deletions(-)

diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index fb7091018a6..cee023a1061 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -722,16 +722,13 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
   }
 }
 
-table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
+table_with_metadata device_parse_nested_json(device_span<SymbolT const> d_input,
                                              cudf::io::json_reader_options const& options,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
 
-  // Allocate device memory for the JSON input & copy over to device
-  rmm::device_uvector<SymbolT> d_input = cudf::detail::make_device_uvector_async(input, stream);
-
   auto gpu_tree = [&]() {
     // Parse the JSON and get the token stream
     const auto [tokens_gpu, token_indices_gpu] = get_token_stream(d_input, options, stream);
@@ -739,7 +736,8 @@ table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
     return get_tree_representation(tokens_gpu, token_indices_gpu, stream);
   }();  // IILE used to free memory of token data.
 #ifdef NJP_DEBUG_PRINT
-  print_tree(input, gpu_tree, stream);
+  auto h_input = cudf::detail::make_host_vector_async(d_input, stream);
+  print_tree(h_input, gpu_tree, stream);
 #endif
 
   auto [gpu_col_id, gpu_row_offsets] = records_orient_tree_traversal(d_input, gpu_tree, stream);
@@ -841,5 +839,17 @@ table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
                              {{}, out_column_names}};
 }
 
+table_with_metadata device_parse_nested_json(host_span<SymbolT const> input,
+                                             cudf::io::json_reader_options const& options,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+
+  // Allocate device memory for the JSON input & copy over to device
+  rmm::device_uvector<SymbolT> d_input = cudf::detail::make_device_uvector_async(input, stream);
+
+  return device_parse_nested_json(device_span<SymbolT const>{d_input}, options, stream, mr);
+}
 }  // namespace detail
 }  // namespace cudf::io::json
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index 8be298c6a8b..50755724c51 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -36,15 +36,18 @@
 #include <rmm/exec_policy.hpp>
 #include <rmm/mr/device/polymorphic_allocator.hpp>
 
+#include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/fill.h>
 #include <thrust/gather.h>
 #include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/discard_iterator.h>
 #include <thrust/iterator/permutation_iterator.h>
 #include <thrust/iterator/transform_output_iterator.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/reduce.h>
+#include <thrust/remove.h>
 #include <thrust/scan.h>
 #include <thrust/sequence.h>
 #include <thrust/sort.h>
@@ -82,7 +85,7 @@ struct node_ranges {
   __device__ auto operator()(size_type i) -> thrust::tuple<SymbolOffsetT, SymbolOffsetT>
   {
     // Whether a token expects to be followed by its respective end-of-* token partner
-    auto is_begin_of_section = [] __device__(PdaTokenT const token) {
+    auto const is_begin_of_section = [] __device__(PdaTokenT const token) {
       switch (token) {
         case token_t::StringBegin:
         case token_t::ValueBegin:
@@ -91,7 +94,7 @@ struct node_ranges {
       };
     };
     // The end-of-* partner token for a given beginning-of-* token
-    auto end_of_partner = [] __device__(PdaTokenT const token) {
+    auto const end_of_partner = [] __device__(PdaTokenT const token) {
       switch (token) {
         case token_t::StringBegin: return token_t::StringEnd;
         case token_t::ValueBegin: return token_t::ValueEnd;
@@ -101,8 +104,8 @@ struct node_ranges {
     };
     // Includes quote char for end-of-string token or Skips the quote char for
     // beginning-of-field-name token
-    auto get_token_index = [include_quote_char = include_quote_char] __device__(
-                             PdaTokenT const token, SymbolOffsetT const token_index) {
+    auto const get_token_index = [include_quote_char = include_quote_char] __device__(
+                                   PdaTokenT const token, SymbolOffsetT const token_index) {
       constexpr SymbolOffsetT quote_char_size = 1;
       switch (token) {
         // Strip off quote char included for StringBegin
@@ -211,7 +214,7 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
 {
   CUDF_FUNC_RANGE();
   // Whether a token does represent a node in the tree representation
-  auto is_node = [] __device__(PdaTokenT const token) -> bool {
+  auto const is_node = [] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::StructBegin:
       case token_t::ListBegin:
@@ -224,7 +227,7 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
   };
 
   // Whether the token pops from the parent node stack
-  auto does_pop = [] __device__(PdaTokenT const token) -> bool {
+  auto const does_pop = [] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::StructMemberEnd:
       case token_t::StructEnd:
@@ -234,7 +237,7 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
   };
 
   // Whether the token pushes onto the parent node stack
-  auto does_push = [] __device__(PdaTokenT const token) -> bool {
+  auto const does_push = [] __device__(PdaTokenT const token) -> bool {
     switch (token) {
       case token_t::FieldNameBegin:
       case token_t::StructBegin:
@@ -243,27 +246,45 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
     };
   };
 
-  auto num_tokens = tokens.size();
-  auto num_nodes  = thrust::count_if(
-    rmm::exec_policy(stream), tokens.begin(), tokens.begin() + num_tokens, is_node);
+  // Look for ErrorBegin and report the point of error.
+  if (auto const error_count =
+        thrust::count(rmm::exec_policy(stream), tokens.begin(), tokens.end(), token_t::ErrorBegin);
+      error_count > 0) {
+    auto const error_location =
+      thrust::find(rmm::exec_policy(stream), tokens.begin(), tokens.end(), token_t::ErrorBegin);
+    SymbolOffsetT error_index;
+    CUDF_CUDA_TRY(
+      cudaMemcpyAsync(&error_index,
+                      token_indices.data() + thrust::distance(tokens.begin(), error_location),
+                      sizeof(SymbolOffsetT),
+                      cudaMemcpyDeviceToHost,
+                      stream.value()));
+    stream.synchronize();
+    CUDF_FAIL("JSON Parser encountered an invalid format at location " +
+              std::to_string(error_index));
+  }
+
+  auto const num_tokens = tokens.size();
+  auto const num_nodes =
+    thrust::count_if(rmm::exec_policy(stream), tokens.begin(), tokens.end(), is_node);
 
   // Node levels: transform_exclusive_scan, copy_if.
   rmm::device_uvector<TreeDepthT> node_levels(num_nodes, stream, mr);
   {
     rmm::device_uvector<TreeDepthT> token_levels(num_tokens, stream);
-    auto push_pop_it = thrust::make_transform_iterator(
+    auto const push_pop_it = thrust::make_transform_iterator(
       tokens.begin(), [does_push, does_pop] __device__(PdaTokenT const token) -> size_type {
         return does_push(token) - does_pop(token);
       });
     thrust::exclusive_scan(
       rmm::exec_policy(stream), push_pop_it, push_pop_it + num_tokens, token_levels.begin());
 
-    auto node_levels_end = thrust::copy_if(rmm::exec_policy(stream),
-                                           token_levels.begin(),
-                                           token_levels.begin() + num_tokens,
-                                           tokens.begin(),
-                                           node_levels.begin(),
-                                           is_node);
+    auto const node_levels_end = thrust::copy_if(rmm::exec_policy(stream),
+                                                 token_levels.begin(),
+                                                 token_levels.end(),
+                                                 tokens.begin(),
+                                                 node_levels.begin(),
+                                                 is_node);
     CUDF_EXPECTS(thrust::distance(node_levels.begin(), node_levels_end) == num_nodes,
                  "node level count mismatch");
   }
@@ -287,8 +308,8 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
     // if previous node is SMB and its previous node is a push, then i-2
     // eg. `{ SMB FB FE VB VE SME` -> `{` index as FB's parent.
     // else -1
-    auto first_childs_parent_token_id = [tokens_gpu =
-                                           tokens.begin()] __device__(auto i) -> NodeIndexT {
+    auto const first_childs_parent_token_id = [tokens_gpu =
+                                                 tokens.begin()] __device__(auto i) -> NodeIndexT {
       if (i <= 0) { return -1; }
       if (tokens_gpu[i - 1] == token_t::StructBegin or tokens_gpu[i - 1] == token_t::ListBegin) {
         return i - 1;
@@ -310,7 +331,7 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
       parent_node_ids.begin(),
       [node_ids_gpu = node_token_ids.begin(), num_nodes, first_childs_parent_token_id] __device__(
         NodeIndexT const tid) -> NodeIndexT {
-        auto pid = first_childs_parent_token_id(tid);
+        auto const pid = first_childs_parent_token_id(tid);
         return pid < 0
                  ? parent_node_sentinel
                  : thrust::lower_bound(thrust::seq, node_ids_gpu, node_ids_gpu + num_nodes, pid) -
@@ -326,28 +347,25 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
 
   // Node categories: copy_if with transform.
   rmm::device_uvector<NodeT> node_categories(num_nodes, stream, mr);
-  auto node_categories_it =
+  auto const node_categories_it =
     thrust::make_transform_output_iterator(node_categories.begin(), token_to_node{});
-  auto node_categories_end = thrust::copy_if(rmm::exec_policy(stream),
-                                             tokens.begin(),
-                                             tokens.begin() + num_tokens,
-                                             node_categories_it,
-                                             is_node);
+  auto const node_categories_end = thrust::copy_if(
+    rmm::exec_policy(stream), tokens.begin(), tokens.end(), node_categories_it, is_node);
   CUDF_EXPECTS(node_categories_end - node_categories_it == num_nodes,
                "node category count mismatch");
 
   // Node ranges: copy_if with transform.
   rmm::device_uvector<SymbolOffsetT> node_range_begin(num_nodes, stream, mr);
   rmm::device_uvector<SymbolOffsetT> node_range_end(num_nodes, stream, mr);
-  auto node_range_tuple_it =
+  auto const node_range_tuple_it =
     thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin());
   // Whether the tokenizer stage should keep quote characters for string values
   // If the tokenizer keeps the quote characters, they may be stripped during type casting
   constexpr bool include_quote_char = true;
-  auto node_range_out_it            = thrust::make_transform_output_iterator(
+  auto const node_range_out_it      = thrust::make_transform_output_iterator(
     node_range_tuple_it, node_ranges{tokens, token_indices, include_quote_char});
 
-  auto node_range_out_end =
+  auto const node_range_out_end =
     thrust::copy_if(rmm::exec_policy(stream),
                     thrust::make_counting_iterator<size_type>(0),
                     thrust::make_counting_iterator<size_type>(0) + num_tokens,
@@ -383,38 +401,45 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
   using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
   using hash_map_type =
     cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
-  auto num_nodes = d_tree.node_categories.size();
+
+  auto const num_nodes  = d_tree.node_categories.size();
+  auto const num_fields = thrust::count(rmm::exec_policy(stream),
+                                        d_tree.node_categories.begin(),
+                                        d_tree.node_categories.end(),
+                                        node_t::NC_FN);
 
   constexpr size_type empty_node_index_sentinel = -1;
-  hash_map_type key_map{compute_hash_table_size(num_nodes),  // TODO reduce oversubscription
+  hash_map_type key_map{compute_hash_table_size(num_fields, 40),  // 40% occupancy in hash map
                         cuco::sentinel::empty_key{empty_node_index_sentinel},
                         cuco::sentinel::empty_value{empty_node_index_sentinel},
                         hash_table_allocator_type{default_allocator<char>{}, stream},
                         stream.value()};
-  auto d_hasher = [d_input          = d_input.data(),
-                   node_range_begin = d_tree.node_range_begin.data(),
-                   node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id) {
+  auto const d_hasher = [d_input          = d_input.data(),
+                         node_range_begin = d_tree.node_range_begin.data(),
+                         node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id) {
     auto const field_name = cudf::string_view(d_input + node_range_begin[node_id],
                                               node_range_end[node_id] - node_range_begin[node_id]);
     return cudf::detail::default_hash<cudf::string_view>{}(field_name);
   };
-  auto d_equal = [d_input          = d_input.data(),
-                  node_range_begin = d_tree.node_range_begin.data(),
-                  node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id1,
-                                                                            auto node_id2) {
+  auto const d_equal = [d_input          = d_input.data(),
+                        node_range_begin = d_tree.node_range_begin.data(),
+                        node_range_end   = d_tree.node_range_end.data()] __device__(auto node_id1,
+                                                                                  auto node_id2) {
     auto const field_name1 = cudf::string_view(
       d_input + node_range_begin[node_id1], node_range_end[node_id1] - node_range_begin[node_id1]);
     auto const field_name2 = cudf::string_view(
       d_input + node_range_begin[node_id2], node_range_end[node_id2] - node_range_begin[node_id2]);
     return field_name1 == field_name2;
   };
-  auto is_field_name_node = [node_categories = d_tree.node_categories.data()] __device__(
-                              auto node_id) { return node_categories[node_id] == node_t::NC_FN; };
   // key-value pairs: uses node_id itself as node_type. (unique node_id for a field name due to
   // hashing)
-  auto iter = cudf::detail::make_counting_transform_iterator(
+  auto const iter = cudf::detail::make_counting_transform_iterator(
     0, [] __device__(size_type i) { return cuco::make_pair(i, i); });
 
+  auto const is_field_name_node = [node_categories =
+                                     d_tree.node_categories.data()] __device__(auto node_id) {
+    return node_categories[node_id] == node_t::NC_FN;
+  };
   key_map.insert_if(iter,
                     iter + num_nodes,
                     thrust::counting_iterator<size_type>(0),  // stencil
@@ -422,9 +447,10 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
                     d_hasher,
                     d_equal,
                     stream.value());
-  auto get_hash_value =
+
+  auto const get_hash_value =
     [key_map = key_map.get_device_view(), d_hasher, d_equal] __device__(auto node_id) -> size_type {
-    auto it = key_map.find(node_id, d_hasher, d_equal);
+    auto const it = key_map.find(node_id, d_hasher, d_equal);
     return (it == key_map.end()) ? size_type{0} : it->second.load(cuda::std::memory_order_relaxed);
   };
 
@@ -444,211 +470,225 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
   return node_type;
 }
 
-/**
- * @brief Translates sorted parent_node_ids to parent_indices with indices from scatter_indices
- *
- * @param scatter_indices The sorted order of parent_node_ids
- * @param parent_node_ids The sorted parent_node_ids
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @return Translated parent_indices pointing to sorted node_ids positions
- */
-rmm::device_uvector<NodeIndexT> translate_sorted_parent_node_indices(
-  device_span<size_type const> scatter_indices,
+// Two level hashing algorithm
+// 1. Convert node_category+fieldname to node_type. (passed as argument)
+//   a. Create a hashmap to hash field name and assign unique node id as values.
+//   b. Convert the node categories to node types.
+//      Node type is defined as node category enum value if it is not a field node,
+//      otherwise it is the unique node id assigned by the hashmap (value shifted by #NUM_CATEGORY).
+// 2. Set operation on entire path of each node
+//   a. Create a hash map with hash of {node_level, node_type} of its node and the entire parent
+//      until root.
+//   b. While creating hashmap, transform node id to unique node ids that are inserted into the
+//      hash map. This mimicks set operation with hash map. This unique node ids are set ids.
+//   c. Return this converted set ids, which are the hash map keys/values, and unique set ids.
+std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_node_path(
+  device_span<TreeDepthT const> node_levels,
+  device_span<size_type const> node_type,
   device_span<NodeIndexT const> parent_node_ids,
-  rmm::cuda_stream_view stream)
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  auto const num_nodes      = scatter_indices.size();
-  auto const gather_indices = cudf::detail::scatter_to_gather(
-    scatter_indices.begin(), scatter_indices.end(), num_nodes, stream);
+  auto const num_nodes = parent_node_ids.size();
+  rmm::device_uvector<size_type> col_id(num_nodes, stream, mr);
 
-  rmm::device_uvector<NodeIndexT> parent_indices(num_nodes, stream);
-  // gather, except parent sentinels
-  thrust::transform(rmm::exec_policy(stream),
-                    parent_node_ids.begin(),
-                    parent_node_ids.end(),
-                    parent_indices.begin(),
-                    [gather_indices = gather_indices.data()] __device__(auto parent_node_id) {
-                      return (parent_node_id == parent_node_sentinel)
-                               ? parent_node_sentinel
-                               : gather_indices[parent_node_id];
-                    });
-  return parent_indices;
-};
+  using hash_table_allocator_type = rmm::mr::stream_allocator_adaptor<default_allocator<char>>;
+  using hash_map_type =
+    cuco::static_map<size_type, size_type, cuda::thread_scope_device, hash_table_allocator_type>;
+
+  constexpr size_type empty_node_index_sentinel = -1;
+  hash_map_type key_map{compute_hash_table_size(num_nodes),  // TODO reduce oversubscription
+                        cuco::sentinel::empty_key{empty_node_index_sentinel},
+                        cuco::sentinel::empty_value{empty_node_index_sentinel},
+                        cuco::sentinel::erased_key{-2},
+                        hash_table_allocator_type{default_allocator<char>{}, stream},
+                        stream.value()};
+  // path compression is not used since extra writes make all map operations slow.
+  auto const d_hasher = [node_level      = node_levels.begin(),
+                         node_type       = node_type.begin(),
+                         parent_node_ids = parent_node_ids.begin()] __device__(auto node_id) {
+    auto hash =
+      cudf::detail::hash_combine(cudf::detail::default_hash<TreeDepthT>{}(node_level[node_id]),
+                                 cudf::detail::default_hash<size_type>{}(node_type[node_id]));
+    node_id = parent_node_ids[node_id];
+    while (node_id != parent_node_sentinel) {
+      hash = cudf::detail::hash_combine(
+        hash, cudf::detail::default_hash<TreeDepthT>{}(node_level[node_id]));
+      hash = cudf::detail::hash_combine(
+        hash, cudf::detail::default_hash<size_type>{}(node_type[node_id]));
+      node_id = parent_node_ids[node_id];
+    }
+    return hash;
+  };
+
+  rmm::device_uvector<hash_value_type> node_hash(num_nodes, stream);
+  thrust::tabulate(rmm::exec_policy(stream), node_hash.begin(), node_hash.end(), d_hasher);
+  auto const d_hashed_cache = [node_hash = node_hash.begin()] __device__(auto node_id) {
+    return node_hash[node_id];
+  };
+
+  auto const d_equal = [node_level      = node_levels.begin(),
+                        node_type       = node_type.begin(),
+                        parent_node_ids = parent_node_ids.begin(),
+                        d_hashed_cache] __device__(auto node_id1, auto node_id2) {
+    if (node_id1 == node_id2) return true;
+    if (d_hashed_cache(node_id1) != d_hashed_cache(node_id2)) return false;
+    auto const is_equal_level = [node_level, node_type](auto node_id1, auto node_id2) {
+      if (node_id1 == node_id2) return true;
+      return node_level[node_id1] == node_level[node_id2] and
+             node_type[node_id1] == node_type[node_id2];
+    };
+    // if both nodes have same node types at all levels, it will check until it has common parent
+    // or root.
+    while (node_id1 != parent_node_sentinel and node_id2 != parent_node_sentinel and
+           node_id1 != node_id2 and is_equal_level(node_id1, node_id2)) {
+      node_id1 = parent_node_ids[node_id1];
+      node_id2 = parent_node_ids[node_id2];
+    }
+    return node_id1 == node_id2;
+  };
+
+  // insert and convert node ids to unique set ids
+  auto const num_inserted = thrust::count_if(
+    rmm::exec_policy(stream),
+    thrust::make_counting_iterator<size_type>(0),
+    thrust::make_counting_iterator<size_type>(num_nodes),
+    [d_hashed_cache,
+     d_equal,
+     view       = key_map.get_device_mutable_view(),
+     uq_node_id = col_id.begin()] __device__(auto node_id) mutable {
+      auto it = view.insert_and_find(cuco::make_pair(node_id, node_id), d_hashed_cache, d_equal);
+      uq_node_id[node_id] = (it.first)->first.load(cuda::std::memory_order_relaxed);
+      return it.second;
+    });
+
+  auto const num_columns = num_inserted;  // key_map.get_size() is not updated.
+  rmm::device_uvector<size_type> unique_keys(num_columns, stream);
+  key_map.retrieve_all(unique_keys.begin(), thrust::make_discard_iterator(), stream.value());
+
+  return {std::move(col_id), std::move(unique_keys)};
+}
 
 /**
- * @brief Generates column id and parent column id for each node from the node_level sorted inputs
+ * @brief Generates column id and parent column id for each node
  *
- * 4. Per-Level Processing: Propagate parent node ids for each level.
- *   For each level,
- *     a. gather col_id from previous level results. input=col_id, gather_map is parent_indices.
- *     b. stable sort by {parent_col_id, node_type}
- *     c. scan sum of unique {parent_col_id, node_type}
- *     d. scatter the col_id back to stable node_level order (using scatter_indices)
+ * 1. Generate col_id:
+ *    a. Set operation on entire path of each node, translate each node id to set id.
+ *       (two level hashing)
+ *    b. gather unique set ids.
+ *    c. sort and use binary search to generate column ids.
+ *    d. Translate parent node ids to parent column ids.
  *
- * pre-condition: All input arguments are stable sorted by node_level (stable in node_id order)
- * post-condition: Returned column_id, parent_col_id are level sorted.
- * @param node_type Unique id to identify node type, field with different name has different id.
- * @param parent_indices Parent node indices in the sorted node_level order
- * @param d_level_boundaries The boundaries of each level in the sorted node_level order
+ * All inputs and outputs are in node_id order.
+ * @param d_input JSON string in device memory
+ * @param d_tree Tree representation of the JSON
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return column_id, parent_column_id
  */
 std::pair<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<NodeIndexT>> generate_column_id(
-  device_span<size_type> node_type,        // level sorted
-  device_span<NodeIndexT> parent_indices,  // level sorted
-  device_span<size_type const> d_level_boundaries,
+  device_span<SymbolT const> d_input,
+  tree_meta_t const& d_tree,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
+  auto const num_nodes = d_tree.node_categories.size();
 
-  auto const num_nodes = node_type.size();
-  rmm::device_uvector<NodeIndexT> col_id(num_nodes, stream, mr);
-  rmm::device_uvector<NodeIndexT> parent_col_id(num_nodes, stream);
-  if (num_nodes == 0) { return {std::move(col_id), std::move(parent_col_id)}; }
-  rmm::device_uvector<size_type> scatter_indices(num_nodes, stream);
-  thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
-  // scatter 1 to level_boundaries alone, useful for scan later
-  thrust::scatter(rmm::exec_policy(stream),
-                  thrust::make_constant_iterator(1),
-                  thrust::make_constant_iterator(1) + d_level_boundaries.size() - 1,
-                  d_level_boundaries.begin(),
-                  col_id.begin());
-  auto level_boundaries = cudf::detail::make_std_vector_async(d_level_boundaries, stream);
-  // Initialize First level node's node col_id to 0
-  thrust::fill(rmm::exec_policy(stream), col_id.begin(), col_id.begin() + level_boundaries[0], 0);
-  // Initialize First level node's parent_col_id to parent_node_sentinel sentinel
-  thrust::fill(rmm::exec_policy(stream),
-               parent_col_id.begin(),
-               parent_col_id.begin() + level_boundaries[0],
-               parent_node_sentinel);
-
-  // Per-level processing
-  auto const num_levels = level_boundaries.size();
-  for (size_t level = 1; level < num_levels; level++) {
-    // Gather the each node's parent's column id for the nodes of the current level
-    thrust::gather(rmm::exec_policy(stream),
-                   parent_indices.data() + level_boundaries[level - 1],
-                   parent_indices.data() + level_boundaries[level],
-                   col_id.data(),
-                   parent_col_id.data() + level_boundaries[level - 1]);
-
-    // To invoke Radix sort for keys {parent_col_id, node_type} instead of merge sort,
-    // we need to split to 2 Radix sorts.
-    // Secondary sort on node_type
-
-    thrust::stable_sort_by_key(
-      rmm::exec_policy(stream),
-      node_type.data() + level_boundaries[level - 1],
-      node_type.data() + level_boundaries[level],
-      thrust::make_zip_iterator(parent_col_id.begin() + level_boundaries[level - 1],
-                                scatter_indices.begin()));
-    // Primary sort on parent_col_id
-    thrust::stable_sort_by_key(
-      rmm::exec_policy(stream),
-      parent_col_id.begin() + level_boundaries[level - 1],
-      parent_col_id.begin() + level_boundaries[level],
-      thrust::make_zip_iterator(node_type.data() + level_boundaries[level - 1],
-                                scatter_indices.begin()));
-
-    auto start_it = thrust::make_zip_iterator(parent_col_id.begin() + level_boundaries[level - 1],
-                                              node_type.data() + level_boundaries[level - 1]);
-    auto adjacent_pair_it = thrust::make_zip_iterator(start_it - 1, start_it);
-    // Compares two adjacent items, beginning with the first and second item from the current level.
-    // Writes flags to the index of the rhs item.
-    // First index holds next col_id from previous level.
-    thrust::transform(rmm::exec_policy(stream),
-                      adjacent_pair_it + 1,
-                      adjacent_pair_it + level_boundaries[level] - level_boundaries[level - 1],
-                      col_id.data() + level_boundaries[level - 1] + 1,
-                      [] __device__(auto adjacent_pair) -> size_type {
-                        auto const lhs = thrust::get<0>(adjacent_pair);
-                        auto const rhs = thrust::get<1>(adjacent_pair);
-                        return lhs != rhs ? 1 : 0;
-                      });
-
-    // includes previous level last col_id to continue the index.
-    thrust::inclusive_scan(rmm::exec_policy(stream),
-                           col_id.data() + level_boundaries[level - 1],
-                           col_id.data() + level_boundaries[level] + (level != num_levels - 1),
-                           // +1 only for not-last-levels, for next level start col_id
-                           col_id.data() + level_boundaries[level - 1]);
-
-    // scatter to restore original order.
-    auto const num_nodes_per_level = level_boundaries[level] - level_boundaries[level - 1];
-    {
-      rmm::device_uvector<NodeIndexT> tmp_col_id(num_nodes_per_level, stream);
-      rmm::device_uvector<NodeIndexT> tmp_parent_col_id(num_nodes_per_level, stream);
-      thrust::scatter(rmm::exec_policy(stream),
-                      thrust::make_zip_iterator(col_id.begin() + level_boundaries[level - 1],
-                                                parent_col_id.data() + level_boundaries[level - 1]),
-                      thrust::make_zip_iterator(col_id.begin() + level_boundaries[level],
-                                                parent_col_id.data() + level_boundaries[level]),
-                      scatter_indices.begin(),
-                      thrust::make_zip_iterator(tmp_col_id.begin(), tmp_parent_col_id.begin()));
-      thrust::copy(rmm::exec_policy(stream),
-                   tmp_col_id.begin(),
-                   tmp_col_id.end(),
-                   col_id.begin() + level_boundaries[level - 1]);
-      thrust::copy(rmm::exec_policy(stream),
-                   tmp_parent_col_id.begin(),
-                   tmp_parent_col_id.end(),
-                   parent_col_id.begin() + level_boundaries[level - 1]);
-    }
-    thrust::sequence(rmm::exec_policy(stream),
-                     scatter_indices.begin(),
-                     scatter_indices.begin() + num_nodes_per_level);
-  }
+  // Two level hashing:
+  //   one for field names -> node_type and,
+  //   another for {node_level, node_category} + field hash for the entire path
+  //    which is {node_level, node_type} recursively using parent_node_id
+  auto [col_id, unique_keys] = [&]() {
+    // Convert node_category + field_name to node_type.
+    rmm::device_uvector<size_type> node_type =
+      hash_node_type_with_field_name(d_input, d_tree, stream);
+
+    // hash entire path from node to root.
+    return hash_node_path(d_tree.node_levels, node_type, d_tree.parent_node_ids, stream, mr);
+  }();
+
+  thrust::sort(rmm::exec_policy(stream), unique_keys.begin(), unique_keys.end());
+  thrust::lower_bound(rmm::exec_policy(stream),
+                      unique_keys.begin(),
+                      unique_keys.end(),
+                      col_id.begin(),
+                      col_id.end(),
+                      col_id.begin());
+
+  rmm::device_uvector<size_type> parent_col_id(num_nodes, stream, mr);
+  thrust::transform(rmm::exec_policy(stream),
+                    d_tree.parent_node_ids.begin(),
+                    d_tree.parent_node_ids.end(),
+                    parent_col_id.begin(),
+                    [col_id = col_id.begin()] __device__(auto node_id) {
+                      return node_id >= 0 ? col_id[node_id] : parent_node_sentinel;
+                    });
 
   return {std::move(col_id), std::move(parent_col_id)};
 }
 
 /**
  * @brief Computes row indices of each node in the hierarchy.
- * 5. Generate row_offset.
- *   a. stable_sort by parent_col_id.
- *   b. scan_by_key {parent_col_id} (required only on nodes who's parent is list)
- *   c. propagate to non-list leaves from parent list node by recursion
+ * 2. Generate row_offset.
+ *   a. Extract only list children
+ *   b. stable_sort by parent_col_id.
+ *   c. scan_by_key {parent_col_id} (done only on nodes who's parent is list)
+ *   d. propagate to non-list leaves from parent list node by recursion
  *
  * pre-condition:
- *  scatter_indices is a sequence, representing node_id.
  *  d_tree.node_categories, d_tree.parent_node_ids, parent_col_id are in order of node_id.
  * post-condition: row_offsets is in order of node_id.
- *  parent_col_id and scatter_indices are sorted by parent_col_id. (unused after this function)
- * @param scatter_indices node_id
+ *  parent_col_id is moved and reused inside this function.
  * @param parent_col_id parent node's column id
  * @param d_tree Tree representation of the JSON string
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return row_offsets
  */
-rmm::device_uvector<size_type> compute_row_offsets(device_span<size_type> scatter_indices,
-                                                   rmm::device_uvector<NodeIndexT>&& parent_col_id,
-                                                   tree_meta_t& d_tree,
+rmm::device_uvector<size_type> compute_row_offsets(rmm::device_uvector<NodeIndexT>&& parent_col_id,
+                                                   tree_meta_t const& d_tree,
                                                    rmm::cuda_stream_view stream,
                                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   auto const num_nodes = d_tree.node_categories.size();
-  // TODO generate scatter_indices sequences here itself
-  thrust::stable_sort_by_key(
-    rmm::exec_policy(stream), parent_col_id.begin(), parent_col_id.end(), scatter_indices.begin());
+
+  rmm::device_uvector<size_type> scatter_indices(num_nodes, stream);
+  thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
+
+  // Extract only list children. (nodes who's parent is a list/root)
+  auto const list_parent_end =
+    thrust::remove_if(rmm::exec_policy(stream),
+                      thrust::make_zip_iterator(parent_col_id.begin(), scatter_indices.begin()),
+                      thrust::make_zip_iterator(parent_col_id.end(), scatter_indices.end()),
+                      d_tree.parent_node_ids.begin(),
+                      [node_categories = d_tree.node_categories.begin()] __device__(auto pnid) {
+                        return !(pnid == parent_node_sentinel || node_categories[pnid] == NC_LIST);
+                      });
+  auto const num_list_parent = thrust::distance(
+    thrust::make_zip_iterator(parent_col_id.begin(), scatter_indices.begin()), list_parent_end);
+
+  thrust::stable_sort_by_key(rmm::exec_policy(stream),
+                             parent_col_id.begin(),
+                             parent_col_id.begin() + num_list_parent,
+                             scatter_indices.begin());
+
   rmm::device_uvector<size_type> row_offsets(num_nodes, stream, mr);
   // TODO is it possible to generate list child_offsets too here?
-  thrust::exclusive_scan_by_key(
-    rmm::exec_policy(stream),
-    parent_col_id.begin(),  // TODO: is there any way to limit this to list parents alone?
-    parent_col_id.end(),
-    thrust::make_constant_iterator<size_type>(1),
-    row_offsets.begin());
+  // write only 1st child offset to parent node id child_offsets?
+  thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
+                                parent_col_id.begin(),
+                                parent_col_id.begin() + num_list_parent,
+                                thrust::make_constant_iterator<size_type>(1),
+                                row_offsets.begin());
 
   // Using scatter instead of sort.
   auto& temp_storage = parent_col_id;  // reuse parent_col_id as temp storage
   thrust::scatter(rmm::exec_policy(stream),
                   row_offsets.begin(),
-                  row_offsets.end(),
+                  row_offsets.begin() + num_list_parent,
                   scatter_indices.begin(),
                   temp_storage.begin());
   row_offsets = std::move(temp_storage);
@@ -672,126 +712,37 @@ rmm::device_uvector<size_type> compute_row_offsets(device_span<size_type> scatte
     },
     [node_categories = d_tree.node_categories.data(),
      parent_node_ids = d_tree.parent_node_ids.begin()] __device__(size_type node_id) {
-      auto parent_node_id = parent_node_ids[node_id];
+      auto const parent_node_id = parent_node_ids[node_id];
       return parent_node_id != parent_node_sentinel and
              !(node_categories[parent_node_id] == node_t::NC_LIST);
     });
   return row_offsets;
 }
 
-/**
-@note
-This algorithm assigns a unique column id to each node in the tree.
-The row offset is the row index of the node in that column id.
-Algorithm:
-1. Convert node_category+fieldname to node_type.
-  a. Create a hashmap to hash field name and assign unique node id as values.
-  b. Convert the node categories to node types.
-     Node type is defined as node category enum value if it is not a field node,
-     otherwise it is the unique node id assigned by the hashmap (value shifted by #NUM_CATEGORY).
-2. Preprocessing: Translate parent node ids after sorting by level.
-  a. sort by level
-  b. get gather map of sorted indices
-  c. translate parent_node_ids to new sorted indices
-3. Find level boundaries.
-   copy_if index of first unique values of sorted levels.
-4. Per-Level Processing: Propagate parent node ids for each level.
-  For each level,
-    a. gather col_id from previous level results. input=col_id, gather_map is parent_indices.
-    b. stable sort by {parent_col_id, node_type}
-    c. scan sum of unique {parent_col_id, node_type}
-    d. scatter the col_id back to stable node_level order (using scatter_indices)
-  Restore original node_id order
-5. Generate row_offset.
-  a. stable_sort by parent_col_id.
-  b. scan_by_key {parent_col_id} (required only on nodes whose parent is a list)
-  c. propagate to non-list leaves from parent list node by recursion
-**/
+// This algorithm assigns a unique column id to each node in the tree.
+// The row offset is the row index of the node in that column id.
+// Algorithm:
+// 1. Generate col_id:
+//   a. Set operation on entire path of each node, translate each node id to set id.
+//   b. gather unique set ids.
+//   c. sort and use binary search to generate column ids.
+//   d. Translate parent node ids to parent column ids.
+// 2. Generate row_offset.
+//   a. filter only list childs
+//   a. stable_sort by parent_col_id.
+//   b. scan_by_key {parent_col_id} (done only on nodes whose parent is a list)
+//   c. propagate to non-list leaves from parent list node by recursion
 std::tuple<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
 records_orient_tree_traversal(device_span<SymbolT const> d_input,
-                              tree_meta_t& d_tree,
+                              tree_meta_t const& d_tree,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  // 1. Convert node_category + field_name to node_type.
-
-  auto num_nodes = d_tree.node_categories.size();
-  rmm::device_uvector<size_type> node_type =
-    hash_node_type_with_field_name(d_input, d_tree, stream);
-  // TODO two-level hashing:  one for field names
-  // and another for {node-level, node_category} + field hash for the entire path
-
-  // 2. Preprocessing: Translate parent node ids after sorting by level.
-  //   a. sort by level
-  //   b. get gather map of sorted indices
-  //   c. translate parent_node_ids to sorted indices
-
-  rmm::device_uvector<size_type> scatter_indices(num_nodes, stream);
-  thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
-
-  rmm::device_uvector<NodeIndexT> parent_node_ids(d_tree.parent_node_ids, stream);  // make a copy
-  auto out_pid =
-    thrust::make_zip_iterator(scatter_indices.data(), parent_node_ids.data(), node_type.data());
-  // Uses cub radix sort. sort by level
-  thrust::stable_sort_by_key(rmm::exec_policy(stream),
-                             d_tree.node_levels.data(),
-                             d_tree.node_levels.data() + num_nodes,
-                             out_pid);
-
-  rmm::device_uvector<NodeIndexT> parent_indices =
-    translate_sorted_parent_node_indices(scatter_indices, parent_node_ids, stream);
-  // TODO optimize memory usage: parent_node_ids is no longer needed
-
-  // 3. Find level boundaries.
-  auto level_boundaries = [&]() {
-    if (d_tree.node_levels.is_empty()) return rmm::device_uvector<size_type>{0, stream};
-    // Already node_levels is sorted
-    auto max_level = d_tree.node_levels.back_element(stream);
-    rmm::device_uvector<size_type> level_boundaries(max_level + 1, stream);
-    // TODO try reduce_by_key
-    auto level_end =
-      thrust::copy_if(rmm::exec_policy(stream),
-                      thrust::make_counting_iterator<size_type>(1),
-                      thrust::make_counting_iterator<size_type>(num_nodes + 1),
-                      level_boundaries.begin(),
-                      [num_nodes, node_levels = d_tree.node_levels.begin()] __device__(auto index) {
-                        return index == num_nodes || node_levels[index] != node_levels[index - 1];
-                      });
-    CUDF_EXPECTS(thrust::distance(level_boundaries.begin(), level_end) == max_level + 1,
-                 "num_levels != max_level + 1");
-    return level_boundaries;
-  };
-
-  // 4. Per-Level Processing: Propagate parent node ids for each level.
-  auto [col_id, parent_col_id] = generate_column_id(node_type,       // level sorted
-                                                    parent_indices,  // level sorted
-                                                    level_boundaries(),
-                                                    stream,
-                                                    mr);
-
-  // restore original order of col_id, parent_col_id and used d_tree members
-  {
-    rmm::device_uvector<NodeIndexT> tmp_col_id(num_nodes, stream);
-    rmm::device_uvector<NodeIndexT> tmp_parent_col_id(num_nodes, stream);
-    rmm::device_uvector<TreeDepthT> tmp_node_levels(num_nodes, stream);
-    thrust::scatter(
-      rmm::exec_policy(stream),
-      thrust::make_zip_iterator(col_id.begin(), parent_col_id.begin(), d_tree.node_levels.begin()),
-      thrust::make_zip_iterator(col_id.end(), parent_col_id.end(), d_tree.node_levels.end()),
-      scatter_indices.begin(),
-      thrust::make_zip_iterator(
-        tmp_col_id.begin(), tmp_parent_col_id.begin(), tmp_node_levels.begin()));
-    col_id             = std::move(tmp_col_id);
-    parent_col_id      = std::move(tmp_parent_col_id);
-    d_tree.node_levels = std::move(tmp_node_levels);
-    thrust::sequence(rmm::exec_policy(stream), scatter_indices.begin(), scatter_indices.end());
-  }
+  auto [new_col_id, new_parent_col_id] = generate_column_id(d_input, d_tree, stream, mr);
 
-  // 5. Generate row_offset.
-  auto row_offsets =
-    compute_row_offsets(scatter_indices, std::move(parent_col_id), d_tree, stream, mr);
-  return std::tuple{std::move(col_id), std::move(row_offsets)};
+  auto row_offsets = compute_row_offsets(std::move(new_parent_col_id), d_tree, stream, mr);
+  return std::tuple{std::move(new_col_id), std::move(row_offsets)};
 }
 
 }  // namespace detail
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 8a0f3566d58..35c09c89d8b 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -322,7 +322,7 @@ tree_meta_t get_tree_representation(
 std::tuple<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
 records_orient_tree_traversal(
   device_span<SymbolT const> d_input,
-  tree_meta_t& d_tree,
+  tree_meta_t const& d_tree,
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index 89156c821c3..59942eecc1d 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -735,6 +735,26 @@ TEST_F(JsonTest, TreeRepresentation3)
   if (std::getenv("NJP_DEBUG_DUMP") != nullptr) { print_tree_representation(input, cpu_tree); }
 }
 
+TEST_F(JsonTest, TreeRepresentationError)
+{
+  auto const stream       = cudf::get_default_stream();
+  std::string const input = R"([ {}, }{])";
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input = cudf::device_span<cuio_json::SymbolT const>{
+    d_scalar.data(), static_cast<size_t>(d_scalar.size())};
+  cudf::io::json_reader_options const options{};
+
+  // Parse the JSON and get the token stream
+  const auto [tokens_gpu, token_indices_gpu] =
+    cudf::io::json::detail::get_token_stream(d_input, options, stream);
+
+  // Get the JSON's tree representation
+  CUDF_EXPECT_THROW_MESSAGE(
+    cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream),
+    "JSON Parser encountered an invalid format at location 6");
+}
+
 /**
  * @brief Test fixture for parametrized JSON tree traversal tests
  */

From 7620fb1cb1f0a0024fccce404a0e9b5fe0694f47 Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Fri, 28 Oct 2022 13:59:15 -0500
Subject: [PATCH 095/202] Add method argument to DataFrame.quantile (#11957)

Adds a `method` argument to `Dataframe.quantile` to match pandas behavior. Also deprecates `DataFrame.quantiles` (with a `FutureWarning` informing the user of the `method` argument).

Closes https://github.com/rapidsai/cudf/issues/11572

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/11957
---
 python/cudf/cudf/_lib/quantiles.pyx      |  20 +++--
 python/cudf/cudf/core/dataframe.py       | 107 +++++++++++++----------
 python/cudf/cudf/core/frame.py           |   4 +-
 python/cudf/cudf/tests/test_quantiles.py |   6 +-
 python/dask_cudf/dask_cudf/sorting.py    |   7 +-
 5 files changed, 83 insertions(+), 61 deletions(-)

diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx
index f65c29a55a8..6cf2d22f978 100644
--- a/python/cudf/cudf/_lib/quantiles.pyx
+++ b/python/cudf/cudf/_lib/quantiles.pyx
@@ -20,7 +20,7 @@ from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.quantiles cimport (
     quantile as cpp_quantile,
-    quantiles as cpp_quantiles,
+    quantiles as cpp_quantile_table,
 )
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
@@ -74,12 +74,14 @@ def quantile(
     return Column.from_unique_ptr(move(c_result))
 
 
-def quantiles(list source_columns,
-              vector[double] q,
-              object interp,
-              object is_input_sorted,
-              list column_order,
-              list null_precedence):
+def quantile_table(
+    list source_columns,
+    vector[double] q,
+    object interp,
+    object is_input_sorted,
+    list column_order,
+    list null_precedence,
+):
     cdef table_view c_input = table_view_from_columns(source_columns)
     cdef vector[double] c_q = q
     cdef interpolation c_interp = <interpolation>(
@@ -108,13 +110,13 @@ def quantiles(list source_columns,
 
     with nogil:
         c_result = move(
-            cpp_quantiles(
+            cpp_quantile_table(
                 c_input,
                 c_q,
                 c_interp,
                 c_is_input_sorted,
                 c_column_order,
-                c_null_precedence
+                c_null_precedence,
             )
         )
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 02c5542a88a..92ca5148c1e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -5195,9 +5195,10 @@ def quantile(
         q=0.5,
         axis=0,
         numeric_only=True,
-        interpolation="linear",
+        interpolation=None,
         columns=None,
         exact=True,
+        method="single",
     ):
         """
         Return values at the given quantile.
@@ -5214,11 +5215,16 @@ def quantile(
         interpolation : {`linear`, `lower`, `higher`, `midpoint`, `nearest`}
             This parameter specifies the interpolation method to use,
             when the desired quantile lies between two data points i and j.
-            Default ``linear``.
+            Default is ``linear`` for ``method="single"``, and ``nearest``
+            for ``method="table"``.
         columns : list of str
             List of column names to include.
         exact : boolean
             Whether to use approximate or exact quantile algorithm.
+        method : {`single`, `table`}, default `single`
+            Whether to compute quantiles per-column ('single') or over all
+            columns ('table'). When 'table', the only allowed interpolation
+            methods are 'nearest', 'lower', and 'higher'.
 
         Returns
         -------
@@ -5271,39 +5277,62 @@ def quantile(
         if columns is None:
             columns = data_df._data.names
 
-        # Ensure that qs is non-scalar so that we always get a column back.
-        qs = [q] if is_scalar(q) else q
-        result = {}
-        for k in data_df._data.names:
-            if k in columns:
-                ser = data_df[k]
-                res = ser.quantile(
-                    qs,
-                    interpolation=interpolation,
-                    exact=exact,
-                    quant_index=False,
-                )._column
-                if len(res) == 0:
-                    res = column.column_empty_like(
-                        qs, dtype=ser.dtype, masked=True, newsize=len(qs)
-                    )
-                result[k] = res
+        if isinstance(q, numbers.Number):
+            q_is_number = True
+            qs = [float(q)]
+        elif pd.api.types.is_list_like(q):
+            q_is_number = False
+            qs = q
+        else:
+            msg = "`q` must be either a single element or list"
+            raise TypeError(msg)
 
-        result = DataFrame._from_data(result)
-        if isinstance(q, numbers.Number) and numeric_only:
-            result = result.fillna(np.nan).iloc[0]
-            result.index = data_df._data.to_pandas_index()
-            result.name = q
-            return result
+        if method == "table":
+            interpolation = interpolation or "nearest"
+            result = self._quantile_table(qs, interpolation.upper())
+
+            if q_is_number:
+                result = result.transpose()
+                return Series(
+                    data=result._columns[0], index=result.index, name=q
+                )
         else:
-            result.index = list(map(float, qs))
-            return result
+            # Ensure that qs is non-scalar so that we always get a column back.
+            interpolation = interpolation or "linear"
+            result = {}
+            for k in data_df._data.names:
+                if k in columns:
+                    ser = data_df[k]
+                    res = ser.quantile(
+                        qs,
+                        interpolation=interpolation,
+                        exact=exact,
+                        quant_index=False,
+                    )._column
+                    if len(res) == 0:
+                        res = column.column_empty_like(
+                            qs, dtype=ser.dtype, masked=True, newsize=len(qs)
+                        )
+                    result[k] = res
+            result = DataFrame._from_data(result)
+
+            if q_is_number and numeric_only:
+                result = result.fillna(np.nan).iloc[0]
+                result.index = data_df.keys()
+                result.name = q
+                return result
+
+        result.index = list(map(float, qs))
+        return result
 
     @_cudf_nvtx_annotate
     def quantiles(self, q=0.5, interpolation="nearest"):
         """
         Return values at the given quantile.
 
+        This API is now deprecated. Please use ``DataFrame.quantile``
+        with ``method='table'``.
+
         Parameters
         ----------
         q : float or array-like
@@ -5317,25 +5346,13 @@ def quantiles(self, q=0.5, interpolation="nearest"):
         -------
         DataFrame
         """
-        if isinstance(q, numbers.Number):
-            q_is_number = True
-            q = [float(q)]
-        elif pd.api.types.is_list_like(q):
-            q_is_number = False
-        else:
-            msg = "`q` must be either a single element or list"
-            raise TypeError(msg)
-
-        result = self._quantiles(q, interpolation.upper())
+        warnings.warn(
+            "DataFrame.quantiles is now deprecated. "
+            "Please use DataFrame.quantile with `method='table'`.",
+            FutureWarning,
+        )
 
-        if q_is_number:
-            result = result.transpose()
-            return Series(
-                data=result._columns[0], index=result.index, name=q[0]
-            )
-        else:
-            result.index = as_index(q)
-            return result
+        return self.quantile(q=q, interpolation=interpolation, method="table")
 
     @_cudf_nvtx_annotate
     def isin(self, values):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 9e539ee157b..12c53ae258d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -953,7 +953,7 @@ def _drop_na_columns(self, how="any", subset=None, thresh=None):
         return self[out_cols]
 
     @_cudf_nvtx_annotate
-    def _quantiles(
+    def _quantile_table(
         self,
         q,
         interpolation="LINEAR",
@@ -972,7 +972,7 @@ def _quantiles(
         ]
 
         return self._from_columns_like_self(
-            libcudf.quantiles.quantiles(
+            libcudf.quantiles.quantile_table(
                 [*self._columns],
                 q,
                 interpolation,
diff --git a/python/cudf/cudf/tests/test_quantiles.py b/python/cudf/cudf/tests/test_quantiles.py
index c4e3f690d13..72b36d8a1a6 100644
--- a/python/cudf/cudf/tests/test_quantiles.py
+++ b/python/cudf/cudf/tests/test_quantiles.py
@@ -16,7 +16,7 @@ def test_single_q():
     gdf = cudf.from_pandas(pdf)
 
     pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantiles(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
 
     assert_eq(pdf_q, gdf_q, check_index_type=False)
 
@@ -28,7 +28,7 @@ def test_with_index():
     gdf = cudf.from_pandas(pdf)
 
     pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantiles(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
 
     assert_eq(pdf_q, gdf_q, check_index_type=False)
 
@@ -48,7 +48,7 @@ def test_with_multiindex():
     gdf = cudf.from_pandas(pdf)
 
     pdf_q = pdf.quantile(q, interpolation="nearest")
-    gdf_q = gdf.quantiles(q, interpolation="nearest")
+    gdf_q = gdf.quantile(q, interpolation="nearest", method="table")
 
     assert_eq(pdf_q, gdf_q, check_index_type=False)
 
diff --git a/python/dask_cudf/dask_cudf/sorting.py b/python/dask_cudf/dask_cudf/sorting.py
index 4c2372393e5..0f2dc0d4efc 100644
--- a/python/dask_cudf/dask_cudf/sorting.py
+++ b/python/dask_cudf/dask_cudf/sorting.py
@@ -48,7 +48,10 @@ def _quantile(a, q):
     n = len(a)
     if not len(a):
         return None, n
-    return (a.quantiles(q=q.tolist(), interpolation="nearest"), n)
+    return (
+        a.quantile(q=q.tolist(), interpolation="nearest", method="table"),
+        n,
+    )
 
 
 @_dask_cudf_nvtx_annotate
@@ -133,7 +136,7 @@ def _approximate_quantile(df, q):
     final_type = df._meta._constructor
 
     # Create metadata
-    meta = df._meta_nonempty.quantiles(q=q)
+    meta = df._meta_nonempty.quantile(q=q, method="table")
 
     # Define final action (create df with quantiles as index)
     def finalize_tsk(tsk):

From 06031670a3bf3a6a715721cb208ea6ea619cab04 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 28 Oct 2022 15:32:48 -0700
Subject: [PATCH 096/202] Add cython-lint to pre-commit checks. (#12020)

Adds `cython-lint` (https://github.com/MarcoGorelli/cython-lint) to the list of pre-commit checks. It is most similar to flake8 but with support for Cython syntax -- the rule set it enforces is fairly short, it mostly helps identify unused imports in Cython files.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12020
---
 .pre-commit-config.yaml                       |  4 +++
 python/cudf/cudf/_lib/aggregation.pyx         |  9 +-----
 python/cudf/cudf/_lib/binaryop.pyx            |  3 --
 python/cudf/cudf/_lib/column.pyx              | 21 ++------------
 python/cudf/cudf/_lib/copying.pyx             |  5 +---
 python/cudf/cudf/_lib/csv.pyx                 | 12 ++------
 python/cudf/cudf/_lib/expressions.pyx         |  1 -
 python/cudf/cudf/_lib/filling.pyx             | 10 +------
 python/cudf/cudf/_lib/groupby.pyx             | 16 +----------
 python/cudf/cudf/_lib/hash.pyx                |  2 --
 python/cudf/cudf/_lib/interop.pyx             |  5 ----
 python/cudf/cudf/_lib/io/utils.pyx            |  4 ---
 python/cudf/cudf/_lib/join.pyx                |  7 -----
 python/cudf/cudf/_lib/json.pyx                |  4 +--
 python/cudf/cudf/_lib/labeling.pyx            | 10 +------
 python/cudf/cudf/_lib/lists.pyx               | 12 ++------
 python/cudf/cudf/_lib/merge.pyx               |  1 -
 .../cudf/cudf/_lib/nvtext/edit_distance.pyx   |  3 +-
 .../cudf/_lib/nvtext/subword_tokenize.pyx     |  5 ++--
 python/cudf/cudf/_lib/nvtext/tokenize.pyx     |  3 +-
 python/cudf/cudf/_lib/orc.pyx                 | 13 ++-------
 python/cudf/cudf/_lib/parquet.pyx             | 11 ++------
 python/cudf/cudf/_lib/partitioning.pyx        |  1 -
 python/cudf/cudf/_lib/quantiles.pyx           |  9 +-----
 python/cudf/cudf/_lib/reduce.pyx              | 28 ++++++-------------
 python/cudf/cudf/_lib/reshape.pyx             |  1 -
 python/cudf/cudf/_lib/rolling.pyx             |  4 ---
 python/cudf/cudf/_lib/scalar.pyx              |  7 +----
 python/cudf/cudf/_lib/sort.pyx                |  7 +----
 python/cudf/cudf/_lib/stream_compaction.pyx   |  9 +-----
 python/cudf/cudf/_lib/string_casting.pyx      |  8 +-----
 python/cudf/cudf/_lib/strings/combine.pyx     |  2 --
 .../strings/convert/convert_fixed_point.pyx   | 17 ++---------
 .../_lib/strings/convert/convert_floats.pyx   |  3 +-
 .../_lib/strings/convert/convert_integers.pyx |  3 +-
 python/cudf/cudf/_lib/strings/extract.pyx     |  2 --
 python/cudf/cudf/_lib/strings/findall.pyx     |  1 -
 python/cudf/cudf/_lib/strings/json.pyx        |  2 --
 python/cudf/cudf/_lib/strings/padding.pyx     |  2 --
 python/cudf/cudf/_lib/strings/replace.pyx     |  3 +-
 .../cudf/_lib/strings/split/partition.pyx     |  6 +---
 python/cudf/cudf/_lib/strings/split/split.pyx |  1 -
 python/cudf/cudf/_lib/strings/strip.pyx       |  2 --
 python/cudf/cudf/_lib/text.pyx                |  5 +---
 python/cudf/cudf/_lib/transform.pyx           |  4 ---
 python/cudf/cudf/_lib/types.pyx               |  1 -
 python/cudf/cudf/_lib/unary.pyx               | 21 ++++----------
 python/cudf/cudf/_lib/utils.pyx               |  1 -
 .../strings_udf/_lib/cudf_jit_udf.pyx         |  2 +-
 49 files changed, 51 insertions(+), 262 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b4e57947cf9..06a69719517 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,6 +26,10 @@ repos:
                 types: [file]
                 types_or: [python, cython]
                 additional_dependencies: ["flake8-force"]
+      - repo: https://github.com/MarcoGorelli/cython-lint
+        rev: v0.1.10
+        hooks:
+              - id: cython-lint
       - repo: https://github.com/pre-commit/mirrors-mypy
         rev: 'v0.971'
         hooks:
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 84dd9c3a576..72c5e288f0b 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -2,20 +2,13 @@
 
 from enum import Enum, IntEnum
 
-import numba
-import numpy as np
 import pandas as pd
 
-from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-from cudf._lib.types import (
-    LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
-    SUPPORTED_NUMPY_TO_LIBCUDF_TYPES,
-    NullHandling,
-)
+from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES, NullHandling
 from cudf.utils import cudautils
 
 from cudf._lib.types cimport (
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index 8728437541d..995fdc7e315 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -2,8 +2,6 @@
 
 from enum import IntEnum
 
-import numpy as np
-
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -11,7 +9,6 @@ from libcpp.utility cimport move
 from cudf._lib.binaryop cimport underlying_type_t_binary_operator
 from cudf._lib.column cimport Column
 
-from cudf._lib.replace import replace_nulls
 from cudf._lib.scalar import as_device_scalar
 
 from cudf._lib.scalar cimport DeviceScalar
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 78125c027dd..1e7f0b175bc 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -2,39 +2,23 @@
 
 import cupy as cp
 import numpy as np
-import pandas as pd
 
 import rmm
 
 import cudf
 import cudf._lib as libcudf
-from cudf.api.types import is_categorical_dtype, is_list_dtype, is_struct_dtype
+from cudf.api.types import is_categorical_dtype
 from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
 
 from cpython.buffer cimport PyObject_CheckBuffer
 from libc.stdint cimport uintptr_t
-from libcpp cimport bool
 from libcpp.memory cimport make_unique, unique_ptr
-from libcpp.pair cimport pair
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from rmm._lib.device_buffer cimport DeviceBuffer
 
-from cudf._lib.cpp.strings.convert.convert_integers cimport (
-    from_integers as cpp_from_integers,
-)
-
-from cudf._lib.types import (
-    LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
-    SUPPORTED_NUMPY_TO_LIBCUDF_TYPES,
-)
-
-from cudf._lib.types cimport (
-    dtype_from_column_view,
-    dtype_to_data_type,
-    underlying_type_t_type_id,
-)
+from cudf._lib.types cimport dtype_from_column_view, dtype_to_data_type
 
 from cudf._lib.null_mask import bitmask_allocation_size_bytes
 
@@ -46,7 +30,6 @@ from cudf._lib.cpp.column.column_factories cimport (
     make_numeric_column,
 )
 from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.scalar cimport DeviceScalar
 
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index a9cfbbbe223..26ec2fbcdfc 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -3,9 +3,7 @@
 import pickle
 import warnings
 
-import pandas as pd
-
-from libc.stdint cimport int32_t, int64_t, uint8_t, uintptr_t
+from libc.stdint cimport int32_t, uint8_t, uintptr_t
 from libcpp cimport bool
 from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
 from libcpp.utility cimport move
@@ -42,7 +40,6 @@ from cudf._lib.utils cimport (
     columns_from_table_view,
     columns_from_unique_ptr,
     data_from_table_view,
-    data_from_unique_ptr,
     table_view_from_columns,
 )
 
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index f1a75baa951..920d423adc5 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -2,13 +2,13 @@
 
 from libcpp cimport bool
 from libcpp.map cimport map
-from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 cimport cudf._lib.cpp.types as libcudf_types
-from cudf._lib.cpp.types cimport data_type, type_id
+from cudf._lib.cpp.types cimport data_type
 from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource
 from cudf._lib.types cimport dtype_to_data_type
 
@@ -45,11 +45,7 @@ from cudf._lib.cpp.io.types cimport (
 )
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.io.utils cimport make_sink_info, make_source_info
-from cudf._lib.utils cimport (
-    data_from_unique_ptr,
-    table_view_from_columns,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
 
 from pyarrow.lib import NativeFile
 
@@ -121,8 +117,6 @@ cdef csv_reader_options make_csv_reader_options(
 ) except *:
     cdef source_info c_source_info = make_source_info([datasource])
     cdef compression_type c_compression
-    cdef size_type c_header
-    cdef string c_prefix
     cdef vector[string] c_names
     cdef size_t c_byte_range_offset = (
         byte_range[0] if byte_range is not None else 0
diff --git a/python/cudf/cudf/_lib/expressions.pyx b/python/cudf/cudf/_lib/expressions.pyx
index f069bcdbe73..269318240b2 100644
--- a/python/cudf/cudf/_lib/expressions.pyx
+++ b/python/cudf/cudf/_lib/expressions.pyx
@@ -5,7 +5,6 @@ from enum import Enum
 from cython.operator cimport dereference
 from libc.stdint cimport int64_t
 from libcpp.memory cimport make_unique, unique_ptr
-from libcpp.utility cimport move
 
 from cudf._lib.cpp cimport expressions as libcudf_exp
 from cudf._lib.cpp.types cimport size_type
diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx
index 592d56158a1..891da82821c 100644
--- a/python/cudf/cudf/_lib/filling.pyx
+++ b/python/cudf/cudf/_lib/filling.pyx
@@ -1,8 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import numpy as np
-
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -15,11 +12,7 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.utils cimport (
-    columns_from_unique_ptr,
-    data_from_unique_ptr,
-    table_view_from_columns,
-)
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
@@ -64,7 +57,6 @@ def repeat(list inp, object count):
 def _repeat_via_column(list inp, Column count):
     cdef table_view c_inp = table_view_from_columns(inp)
     cdef column_view c_count = count.view()
-    cdef bool c_check_count = False
     cdef unique_ptr[table] c_result
 
     with nogil:
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index 08a1d74f80f..e6fbefaeee9 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -1,12 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from collections import defaultdict
-
-import numpy as np
 from pandas.core.groupby.groupby import DataError
 
-import rmm
-
 from cudf.api.types import (
     is_categorical_dtype,
     is_decimal_dtype,
@@ -22,16 +17,9 @@ from libcpp.pair cimport pair
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-import cudf
-
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.utils cimport (
-    columns_from_unique_ptr,
-    data_from_unique_ptr,
-    table_view_from_columns,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 from cudf._lib.scalar import as_device_scalar
 
@@ -44,13 +32,11 @@ from cudf._lib.aggregation cimport (
     make_groupby_scan_aggregation,
 )
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.libcpp.functional cimport reference_wrapper
 from cudf._lib.cpp.replace cimport replace_policy
 from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.table.table cimport table, table_view
 from cudf._lib.cpp.types cimport size_type
-from cudf._lib.cpp.utilities.host_span cimport host_span
 
 # The sets below define the possible aggregations that can be performed on
 # different dtypes. These strings must be elements of the AggregationKind enum.
diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 1eba3a2f6b5..03033cd1a7e 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -1,7 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from libc.stdint cimport uint32_t
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
diff --git a/python/cudf/cudf/_lib/interop.pyx b/python/cudf/cudf/_lib/interop.pyx
index ee5ce165f95..92840561563 100644
--- a/python/cudf/cudf/_lib/interop.pyx
+++ b/python/cudf/cudf/_lib/interop.pyx
@@ -1,11 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import cudf
-
 from cpython cimport pycapsule
-from libcpp cimport bool
 from libcpp.memory cimport shared_ptr, unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 from pyarrow.lib cimport CTable, pyarrow_unwrap_table, pyarrow_wrap_table
@@ -18,7 +14,6 @@ from cudf._lib.cpp.interop cimport (
     to_arrow as cpp_to_arrow,
     to_dlpack as cpp_to_dlpack,
 )
-from cudf._lib.cpp.io.types cimport column_in_metadata
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
index 21939ff39b6..7dbe395be79 100644
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ b/python/cudf/cudf/_lib/io/utils.pyx
@@ -2,9 +2,7 @@
 
 from cpython.buffer cimport PyBUF_READ
 from cpython.memoryview cimport PyMemoryView_FromMemory
-from libcpp.map cimport map
 from libcpp.memory cimport unique_ptr
-from libcpp.pair cimport pair
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
@@ -15,7 +13,6 @@ from cudf._lib.cpp.io.types cimport (
     data_sink,
     datasource,
     host_buffer,
-    io_type,
     sink_info,
     source_info,
 )
@@ -26,7 +23,6 @@ import errno
 import io
 import os
 
-import cudf
 from cudf.api.types import is_struct_dtype
 
 
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index 1baef266dab..ff5f6e1afcc 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -1,19 +1,12 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from itertools import chain
-
-import cudf
-
-from libcpp cimport bool
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
-from libcpp.vector cimport vector
 
 cimport cudf._lib.cpp.join as cpp_join
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport data_type, size_type, type_id
 from cudf._lib.utils cimport table_view_from_columns
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 5efe40ed2e9..533329bd856 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -2,7 +2,6 @@
 
 # cython: boundscheck = False
 
-
 import io
 import os
 from collections import abc
@@ -16,13 +15,12 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 cimport cudf._lib.cpp.io.types as cudf_io_types
-cimport cudf._lib.cpp.types as libcudf_types
 from cudf._lib.cpp.io.json cimport (
     json_reader_options,
     read_json as libcudf_read_json,
     schema_element,
 )
-from cudf._lib.cpp.types cimport data_type, size_type, type_id
+from cudf._lib.cpp.types cimport data_type, size_type
 from cudf._lib.io.utils cimport make_source_info, update_struct_field_names
 from cudf._lib.types cimport dtype_to_data_type
 from cudf._lib.utils cimport data_from_unique_ptr
diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx
index 088942064a8..ed5033c08a5 100644
--- a/python/cudf/cudf/_lib/labeling.pyx
+++ b/python/cudf/cudf/_lib/labeling.pyx
@@ -1,18 +1,10 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from enum import IntEnum
-
-import numpy as np
-
-from libc.stdint cimport uint32_t
 from libcpp cimport bool as cbool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
-
-from cudf._lib.replace import replace_nulls
-
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.labeling cimport inclusive, label_bins as cpp_label_bins
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 581207c97a5..8a7b4be3be9 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -12,10 +12,12 @@ from cudf._lib.cpp.lists.combine cimport (
     concatenate_null_policy,
     concatenate_rows as cpp_concatenate_rows,
 )
+from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of
 from cudf._lib.cpp.lists.count_elements cimport (
     count_elements as cpp_count_elements,
 )
 from cudf._lib.cpp.lists.explode cimport explode_outer as cpp_explode_outer
+from cudf._lib.cpp.lists.extract cimport extract_list_element
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.cpp.lists.sorting cimport sort_lists as cpp_sort_lists
 from cudf._lib.cpp.lists.stream_compaction cimport distinct as cpp_distinct
@@ -26,20 +28,10 @@ from cudf._lib.cpp.types cimport (
     nan_equality,
     null_equality,
     null_order,
-    null_policy,
     order,
     size_type,
 )
 from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.types cimport (
-    underlying_type_t_null_order,
-    underlying_type_t_order,
-)
-
-from cudf.core.dtypes import ListDtype
-
-from cudf._lib.cpp.lists.contains cimport contains, index_of as cpp_index_of
-from cudf._lib.cpp.lists.extract cimport extract_list_element
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
diff --git a/python/cudf/cudf/_lib/merge.pyx b/python/cudf/cudf/_lib/merge.pyx
index dae2c466266..935d8c69adc 100644
--- a/python/cudf/cudf/_lib/merge.pyx
+++ b/python/cudf/cudf/_lib/merge.pyx
@@ -6,7 +6,6 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 cimport cudf._lib.cpp.types as libcudf_types
-from cudf._lib.column cimport Column
 from cudf._lib.cpp.merge cimport merge as cpp_merge
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
index f1e15570e9f..c8dc6edd6e2 100644
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
@@ -1,6 +1,5 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
index 426744ee46c..dd8bbd6d7b6 100644
--- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
@@ -1,13 +1,12 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from libc.stdint cimport uint32_t, uintptr_t
+from libc.stdint cimport uint32_t
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.nvtext.subword_tokenize cimport (
     hashed_vocabulary as cpp_hashed_vocabulary,
diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
index 5fc852c2ab0..00f63b9cf7c 100644
--- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,7 +13,6 @@ from cudf._lib.cpp.nvtext.tokenize cimport (
     tokenize as cpp_tokenize,
 )
 from cudf._lib.cpp.scalar.scalar cimport string_scalar
-from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index be7b29da515..f57e4e8f281 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -8,6 +8,7 @@ from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
+
 from collections import OrderedDict
 
 cimport cudf._lib.cpp.lists.lists_column_view as cpp_lists_column_view
@@ -19,7 +20,6 @@ except ImportError:
 
 cimport cudf._lib.cpp.io.types as cudf_io_types
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.io.orc cimport (
     chunked_orc_writer_options,
     orc_chunked_writer,
@@ -34,7 +34,6 @@ from cudf._lib.cpp.io.orc_metadata cimport (
 )
 from cudf._lib.cpp.io.types cimport (
     column_in_metadata,
-    column_name_info,
     compression_type,
     data_sink,
     sink_info,
@@ -49,20 +48,12 @@ from cudf._lib.io.utils cimport (
     make_sink_info,
     make_source_info,
     update_column_struct_field_names,
-    update_struct_field_names,
 )
 
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from cudf._lib.types cimport underlying_type_t_type_id
-
-import numpy as np
-
-from cudf._lib.utils cimport (
-    data_from_unique_ptr,
-    get_column_names,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
 
 from pyarrow.lib import NativeFile
 
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 891f259a828..6de84ce90c3 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -2,9 +2,7 @@
 
 # cython: boundscheck = False
 
-import errno
 import io
-import os
 
 import pyarrow as pa
 
@@ -20,20 +18,17 @@ import numpy as np
 from cython.operator cimport dereference
 
 from cudf.api.types import (
-    is_categorical_dtype,
     is_decimal_dtype,
     is_list_dtype,
     is_list_like,
     is_struct_dtype,
 )
-from cudf.utils.dtypes import np_to_pa_dtype
 
-from cudf._lib.utils cimport data_from_unique_ptr, get_column_names
+from cudf._lib.utils cimport data_from_unique_ptr
 
 from cudf._lib.utils import _index_level_name, generate_pandas_metadata
 
 from libc.stdint cimport uint8_t
-from libc.stdlib cimport free
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.memory cimport make_unique, unique_ptr
@@ -47,7 +42,6 @@ cimport cudf._lib.cpp.types as cudf_types
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.io.parquet cimport (
     chunked_parquet_writer_options,
-    chunked_parquet_writer_options_builder,
     merge_row_group_metadata as parquet_merge_metadata,
     parquet_chunked_writer as cpp_parquet_chunked_writer,
     parquet_reader_options,
@@ -59,9 +53,8 @@ from cudf._lib.cpp.io.types cimport column_in_metadata, table_input_metadata
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport data_type, size_type
-from cudf._lib.io.datasource cimport Datasource, NativeFileDatasource
+from cudf._lib.io.datasource cimport NativeFileDatasource
 from cudf._lib.io.utils cimport (
-    make_sink_info,
     make_sinks_info,
     make_source_info,
     update_struct_field_names,
diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index f2f5a92aca1..233551c5134 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -1,6 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx
index 6cf2d22f978..62706367c4f 100644
--- a/python/cudf/cudf/_lib/quantiles.pyx
+++ b/python/cudf/cudf/_lib/quantiles.pyx
@@ -6,7 +6,6 @@ from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
 from cudf._lib.column cimport Column
-from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.types cimport (
     underlying_type_t_interpolation,
     underlying_type_t_null_order,
@@ -24,13 +23,7 @@ from cudf._lib.cpp.quantiles cimport (
 )
 from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.cpp.types cimport (
-    interpolation,
-    null_order,
-    order,
-    order_info,
-    sorted,
-)
+from cudf._lib.cpp.types cimport interpolation, null_order, order, sorted
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index ec338eeee70..e46d724ed9d 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -3,17 +3,6 @@
 from cython.operator import dereference
 
 import cudf
-from cudf.api.types import is_decimal_dtype
-
-from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.reduce cimport cpp_minmax, cpp_reduce, cpp_scan, scan_type
-from cudf._lib.cpp.scalar.scalar cimport scalar
-from cudf._lib.cpp.types cimport data_type, type_id
-from cudf._lib.scalar cimport DeviceScalar
-
-from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move, pair
@@ -24,15 +13,14 @@ from cudf._lib.aggregation cimport (
     make_reduce_aggregation,
     make_scan_aggregation,
 )
-from cudf._lib.types cimport (
-    dtype_to_data_type,
-    is_decimal_type_id,
-    underlying_type_t_type_id,
-)
-
-import numpy as np
-
-cimport cudf._lib.cpp.types as libcudf_types
+from cudf._lib.column cimport Column
+from cudf._lib.cpp.column.column cimport column
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.reduce cimport cpp_minmax, cpp_reduce, cpp_scan, scan_type
+from cudf._lib.cpp.scalar.scalar cimport scalar
+from cudf._lib.cpp.types cimport data_type
+from cudf._lib.scalar cimport DeviceScalar
+from cudf._lib.types cimport dtype_to_data_type, is_decimal_type_id
 
 
 def reduce(reduction_op, Column incol, dtype=None, **kwargs):
diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx
index 29223947eea..84bad039199 100644
--- a/python/cudf/cudf/_lib/reshape.pyx
+++ b/python/cudf/cudf/_lib/reshape.pyx
@@ -5,7 +5,6 @@ from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.reshape cimport (
     interleave_columns as cpp_interleave_columns,
     tile as cpp_tile,
diff --git a/python/cudf/cudf/_lib/rolling.pyx b/python/cudf/cudf/_lib/rolling.pyx
index d12b096bc08..7b0da6957a0 100644
--- a/python/cudf/cudf/_lib/rolling.pyx
+++ b/python/cudf/cudf/_lib/rolling.pyx
@@ -1,9 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import pandas as pd
-
-import cudf
-
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx
index 9b422b77eeb..af63964bac3 100644
--- a/python/cudf/cudf/_lib/scalar.pyx
+++ b/python/cudf/cudf/_lib/scalar.pyx
@@ -69,12 +69,7 @@ from cudf._lib.cpp.wrappers.timestamps cimport (
     timestamp_s,
     timestamp_us,
 )
-from cudf._lib.utils cimport (
-    columns_from_table_view,
-    data_from_table_view,
-    table_view_from_columns,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
 # The DeviceMemoryResource attribute could be released prematurely
diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index 8074c7c5d3a..eb3aed80700 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -5,8 +5,6 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-from enum import IntEnum
-
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.aggregation cimport (
     rank_method,
@@ -20,10 +18,9 @@ from cudf._lib.cpp.sorting cimport (
     rank,
     sorted_order,
 )
-from cudf._lib.cpp.table.table cimport table
 from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport null_order, null_policy, order
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+from cudf._lib.utils cimport table_view_from_columns
 
 
 def is_sorted(
@@ -198,8 +195,6 @@ def rank_columns(list source_columns, object method, str na_option,
     """
     Compute numerical data ranks (1 through n) of each column in the dataframe
     """
-    cdef table_view source_table_view = table_view_from_columns(source_columns)
-
     cdef rank_method c_rank_method = < rank_method > (
         < underlying_type_t_rank_method > method
     )
diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx
index b645fcd59d0..38cead87e76 100644
--- a/python/cudf/cudf/_lib/stream_compaction.pyx
+++ b/python/cudf/cudf/_lib/stream_compaction.pyx
@@ -1,7 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import pandas as pd
-
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -27,12 +25,7 @@ from cudf._lib.cpp.types cimport (
     order,
     size_type,
 )
-from cudf._lib.utils cimport (
-    columns_from_unique_ptr,
-    data_from_unique_ptr,
-    table_view_from_columns,
-    table_view_from_table,
-)
+from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
 def drop_nulls(list columns, how="any", keys=None, thresh=None):
diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index f9e98efbbd9..3b29255e4b6 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -1,6 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
-
-import numpy as np
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from cudf._lib.column cimport Column
 
@@ -46,10 +44,6 @@ from cudf._lib.cpp.strings.convert.convert_ipv4 cimport (
     ipv4_to_integers as cpp_ipv4_to_integers,
     is_ipv4 as cpp_is_ipv4,
 )
-from cudf._lib.cpp.strings.convert.convert_urls cimport (
-    url_decode as cpp_url_decode,
-    url_encode as cpp_url_encode,
-)
 from cudf._lib.cpp.types cimport data_type, type_id
 from cudf._lib.types cimport underlying_type_t_type_id
 
diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index eeb39f70728..141732b4c75 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -1,7 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
@@ -16,7 +15,6 @@ from cudf._lib.cpp.strings.combine cimport (
     separator_on_nulls as separator_on_nulls,
 )
 from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport table_view_from_columns
 
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index dfc9cae915f..fc07cf6462a 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -1,17 +1,11 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-import numpy as np
-
 import cudf
 
-from cudf._lib.column cimport Column
-
-from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
-
 from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 
+from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.strings.convert.convert_fixed_point cimport (
@@ -19,14 +13,7 @@ from cudf._lib.cpp.strings.convert.convert_fixed_point cimport (
     is_fixed_point as cpp_is_fixed_point,
     to_fixed_point as cpp_to_fixed_point,
 )
-from cudf._lib.cpp.types cimport (
-    DECIMAL32,
-    DECIMAL64,
-    DECIMAL128,
-    data_type,
-    type_id,
-)
-from cudf._lib.types cimport underlying_type_t_type_id
+from cudf._lib.cpp.types cimport DECIMAL32, DECIMAL64, DECIMAL128, data_type
 
 
 def from_decimal(Column input_col):
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
index d47b1e6e651..f9d028c5eb5 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
@@ -1,6 +1,5 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
index 08bcca93086..220cbd0f760 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
@@ -1,6 +1,5 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
diff --git a/python/cudf/cudf/_lib/strings/extract.pyx b/python/cudf/cudf/_lib/strings/extract.pyx
index bac282dccc5..439c1546381 100644
--- a/python/cudf/cudf/_lib/strings/extract.pyx
+++ b/python/cudf/cudf/_lib/strings/extract.pyx
@@ -6,12 +6,10 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.strings.extract cimport extract as cpp_extract
 from cudf._lib.cpp.strings.regex_flags cimport regex_flags
 from cudf._lib.cpp.table.table cimport table
-from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
 
 
diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx
index bb4159ba309..be34ce1fb18 100644
--- a/python/cudf/cudf/_lib/strings/findall.pyx
+++ b/python/cudf/cudf/_lib/strings/findall.pyx
@@ -10,7 +10,6 @@ from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.strings.findall cimport findall as cpp_findall
 from cudf._lib.cpp.strings.regex_flags cimport regex_flags
-from cudf._lib.utils cimport data_from_unique_ptr
 
 
 def findall(Column source_strings, object pattern, uint32_t flags):
diff --git a/python/cudf/cudf/_lib/strings/json.pyx b/python/cudf/cudf/_lib/strings/json.pyx
index 1b1a9717e44..9dbc932d842 100644
--- a/python/cudf/cudf/_lib/strings/json.pyx
+++ b/python/cudf/cudf/_lib/strings/json.pyx
@@ -1,6 +1,5 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -12,7 +11,6 @@ from cudf._lib.cpp.strings.json cimport (
     get_json_object as cpp_get_json_object,
     get_json_object_options,
 )
-from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/strings/padding.pyx b/python/cudf/cudf/_lib/strings/padding.pyx
index 9377870c1c1..99270b340eb 100644
--- a/python/cudf/cudf/_lib/strings/padding.pyx
+++ b/python/cudf/cudf/_lib/strings/padding.pyx
@@ -5,9 +5,7 @@ from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.scalar.scalar cimport string_scalar
 from cudf._lib.cpp.types cimport size_type
-from cudf._lib.scalar cimport DeviceScalar
 
 from enum import IntEnum
 
diff --git a/python/cudf/cudf/_lib/strings/replace.pyx b/python/cudf/cudf/_lib/strings/replace.pyx
index f5c47d2a2ed..72d66d9a8e3 100644
--- a/python/cudf/cudf/_lib/strings/replace.pyx
+++ b/python/cudf/cudf/_lib/strings/replace.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
@@ -12,7 +12,6 @@ from cudf._lib.cpp.strings.replace cimport (
     replace as cpp_replace,
     replace_slice as cpp_replace_slice,
 )
-from cudf._lib.cpp.strings.substring cimport slice_strings as cpp_slice_strings
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
diff --git a/python/cudf/cudf/_lib/strings/split/partition.pyx b/python/cudf/cudf/_lib/strings/split/partition.pyx
index b145b9ee52d..b17ea4e608d 100644
--- a/python/cudf/cudf/_lib/strings/split/partition.pyx
+++ b/python/cudf/cudf/_lib/strings/split/partition.pyx
@@ -1,11 +1,9 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.scalar.scalar cimport string_scalar
 from cudf._lib.cpp.strings.split.partition cimport (
@@ -13,8 +11,6 @@ from cudf._lib.cpp.strings.split.partition cimport (
     rpartition as cpp_rpartition,
 )
 from cudf._lib.cpp.table.table cimport table
-from cudf._lib.cpp.table.table_view cimport table_view
-from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
 
diff --git a/python/cudf/cudf/_lib/strings/split/split.pyx b/python/cudf/cudf/_lib/strings/split/split.pyx
index ce066aa6aec..e96c911e83a 100644
--- a/python/cudf/cudf/_lib/strings/split/split.pyx
+++ b/python/cudf/cudf/_lib/strings/split/split.pyx
@@ -19,7 +19,6 @@ from cudf._lib.cpp.strings.split.split cimport (
     split_record_re as cpp_split_record_re,
 )
 from cudf._lib.cpp.table.table cimport table
-from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
diff --git a/python/cudf/cudf/_lib/strings/strip.pyx b/python/cudf/cudf/_lib/strings/strip.pyx
index 93dfbcedb83..da3efe33786 100644
--- a/python/cudf/cudf/_lib/strings/strip.pyx
+++ b/python/cudf/cudf/_lib/strings/strip.pyx
@@ -1,7 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from cudf._lib.column cimport Column
@@ -10,7 +9,6 @@ from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.scalar.scalar cimport string_scalar
 from cudf._lib.cpp.strings.side_type cimport side_type
 from cudf._lib.cpp.strings.strip cimport strip as cpp_strip
-from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
diff --git a/python/cudf/cudf/_lib/text.pyx b/python/cudf/cudf/_lib/text.pyx
index be11132497e..a7346cdd586 100644
--- a/python/cudf/cudf/_lib/text.pyx
+++ b/python/cudf/cudf/_lib/text.pyx
@@ -2,11 +2,9 @@
 
 from io import TextIOBase
 
-import cudf
-
 from cython.operator cimport dereference
 from libc.stdint cimport uint64_t
-from libcpp.memory cimport make_unique, unique_ptr
+from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
@@ -43,7 +41,6 @@ def read_text(object filepaths_or_buffers,
 
     cdef size_t c_byte_range_offset
     cdef size_t c_byte_range_size
-    cdef byte_range_info c_byte_range
     cdef uint64_t c_compression_begin_offset
     cdef uint64_t c_compression_end_offset
     cdef parse_options c_options
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index 5fa45f68357..e1612855dae 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -1,6 +1,5 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
-import numpy as np
 from numba.np import numpy_support
 
 import cudf
@@ -31,9 +30,7 @@ from cudf._lib.types cimport underlying_type_t_type_id
 from cudf._lib.utils cimport (
     columns_from_unique_ptr,
     data_from_table_view,
-    data_from_unique_ptr,
     table_view_from_columns,
-    table_view_from_table,
 )
 
 
@@ -45,7 +42,6 @@ def bools_to_mask(Column col):
     cdef column_view col_view = col.view()
     cdef pair[unique_ptr[device_buffer], size_type] cpp_out
     cdef unique_ptr[device_buffer] up_db
-    cdef size_type null_count
 
     with nogil:
         cpp_out = move(libcudf_transform.bools_to_mask(col_view))
diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx
index aa70ab99ebd..c306d398123 100644
--- a/python/cudf/cudf/_lib/types.pyx
+++ b/python/cudf/cudf/_lib/types.pyx
@@ -11,7 +11,6 @@ from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.types cimport (
     underlying_type_t_interpolation,
-    underlying_type_t_null_order,
     underlying_type_t_order,
     underlying_type_t_sorted,
 )
diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx
index acca61cf9d1..52f0a804b2a 100644
--- a/python/cudf/cudf/_lib/unary.pyx
+++ b/python/cudf/cudf/_lib/unary.pyx
@@ -1,32 +1,21 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from enum import IntEnum
 
 from cudf.api.types import is_decimal_dtype
 
-from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
 import numpy as np
 
+cimport cudf._lib.cpp.unary as libcudf_unary
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
-from cudf._lib.cpp.column.column_view cimport column_view, mutable_column_view
-
-from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
-
-from cudf._lib.cpp.types cimport data_type, size_type, type_id
-
-from cudf._lib.column import (
-    LIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
-    SUPPORTED_NUMPY_TO_LIBCUDF_TYPES,
-)
-
-cimport cudf._lib.cpp.types as libcudf_types
-cimport cudf._lib.cpp.unary as libcudf_unary
+from cudf._lib.cpp.column.column_view cimport column_view
+from cudf._lib.cpp.types cimport data_type
 from cudf._lib.cpp.unary cimport unary_operator, underlying_type_t_unary_op
-from cudf._lib.types cimport dtype_to_data_type, underlying_type_t_type_id
+from cudf._lib.types cimport dtype_to_data_type
 
 
 class UnaryOp(IntEnum):
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index e0bdc7d8f74..989d12caca0 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -6,7 +6,6 @@ import pyarrow as pa
 import cudf
 
 from cython.operator cimport dereference
-from libc.stdint cimport uint8_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index bb1892a4d26..7a0cdeb10b9 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -6,7 +6,7 @@ from libcpp.utility cimport move
 from cudf.core.buffer import Buffer
 
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column, column_view
+from cudf._lib.cpp.column.column cimport column_view
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from strings_udf._lib.cpp.strings_udf cimport (

From 1c057bcc87b9fa8a9e8b39eec60375d1346b9ff5 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 31 Oct 2022 06:12:18 -0700
Subject: [PATCH 097/202] Use pragma once (#12019)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR adapts a few files using header guards with `#ifndef… #define` to use `#pragma once` instead. This establishes a more consistent code style for the library.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/12019
---
 cpp/benchmarks/synchronization/synchronization.hpp | 5 +----
 cpp/src/hash/hash_allocator.cuh                    | 5 +----
 cpp/src/hash/helper_functions.cuh                  | 7 ++-----
 cpp/src/hash/managed.cuh                           | 7 ++-----
 cpp/src/unary/unary_ops.cuh                        | 5 +----
 5 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/cpp/benchmarks/synchronization/synchronization.hpp b/cpp/benchmarks/synchronization/synchronization.hpp
index ebff1ff888d..e56d881d459 100644
--- a/cpp/benchmarks/synchronization/synchronization.hpp
+++ b/cpp/benchmarks/synchronization/synchronization.hpp
@@ -58,8 +58,7 @@
 
  */
 
-#ifndef CUDF_BENCH_SYNCHRONIZATION_H
-#define CUDF_BENCH_SYNCHRONIZATION_H
+#pragma once
 
 // Google Benchmark library
 #include <benchmark/benchmark.h>
@@ -102,5 +101,3 @@ class cuda_event_timer {
   rmm::cuda_stream_view stream;
   benchmark::State* p_state;
 };
-
-#endif
diff --git a/cpp/src/hash/hash_allocator.cuh b/cpp/src/hash/hash_allocator.cuh
index 709b72d4fd2..207f46ae543 100644
--- a/cpp/src/hash/hash_allocator.cuh
+++ b/cpp/src/hash/hash_allocator.cuh
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#ifndef HASH_ALLOCATOR_CUH
-#define HASH_ALLOCATOR_CUH
+#pragma once
 
 #include <new>
 
@@ -61,5 +60,3 @@ bool operator!=(const default_allocator<T>&, const default_allocator<U>&)
 {
   return false;
 }
-
-#endif
diff --git a/cpp/src/hash/helper_functions.cuh b/cpp/src/hash/helper_functions.cuh
index 3b8d8528894..70fc47538c9 100644
--- a/cpp/src/hash/helper_functions.cuh
+++ b/cpp/src/hash/helper_functions.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#ifndef HELPER_FUNCTIONS_CUH
-#define HELPER_FUNCTIONS_CUH
+#pragma once
 
 #include <cudf/types.hpp>
 
@@ -242,5 +241,3 @@ __host__ __device__ bool operator!=(const cycle_iterator_adapter<T>& lhs,
 {
   return !lhs.equal(rhs);
 }
-
-#endif  // HELPER_FUNCTIONS_CUH
diff --git a/cpp/src/hash/managed.cuh b/cpp/src/hash/managed.cuh
index c5aab78589e..d85a12c69a9 100644
--- a/cpp/src/hash/managed.cuh
+++ b/cpp/src/hash/managed.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017, NVIDIA CORPORATION.
+ * Copyright (c) 2017-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#ifndef MANAGED_CUH
-#define MANAGED_CUH
+#pragma once
 
 #include <new>
 
@@ -43,5 +42,3 @@ inline bool isPtrManaged(cudaPointerAttributes attr)
   return attr.isManaged;
 #endif
 }
-
-#endif  // MANAGED_CUH
diff --git a/cpp/src/unary/unary_ops.cuh b/cpp/src/unary/unary_ops.cuh
index 08b68cc0591..d0003bb6b41 100644
--- a/cpp/src/unary/unary_ops.cuh
+++ b/cpp/src/unary/unary_ops.cuh
@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-#ifndef UNARY_OPS_H
-#define UNARY_OPS_H
+#pragma once
 
 #include <cudf/copying.hpp>
 #include <cudf/detail/copy.hpp>
@@ -78,5 +77,3 @@ struct launcher {
 
 }  // namespace unary
 }  // namespace cudf
-
-#endif  // UNARY_OPS_H

From f0b4c4fbb4b8b3f6b648cdf5fd105d69b3ab0cae Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 31 Oct 2022 16:53:46 -0700
Subject: [PATCH 098/202] Pass column names to `write_csv` instead of
 `table_metadata` pointer (#11972)

contributes to https://github.com/rapidsai/cudf/issues/6411
`write_csv` takes a pointer to `table_metadata` but only uses the column names.
This PR changes the API to directly take column names. This also aligns with `read_csv`.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Nghia Truong (https://github.com/ttnghia)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/11972
---
 cpp/include/cudf/io/csv.hpp          | 24 +++++------
 cpp/include/cudf/io/detail/csv.hpp   |  4 +-
 cpp/src/io/csv/writer_impl.cu        | 15 +++----
 cpp/src/io/functions.cpp             |  2 +-
 cpp/tests/io/csv_test.cpp            | 64 +++++++++++-----------------
 python/cudf/cudf/_lib/cpp/io/csv.pxd |  8 ++--
 python/cudf/cudf/_lib/csv.pyx        | 15 +++----
 7 files changed, 57 insertions(+), 75 deletions(-)

diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp
index f753028a148..1fc4114b94c 100644
--- a/cpp/include/cudf/io/csv.hpp
+++ b/cpp/include/cudf/io/csv.hpp
@@ -1338,8 +1338,8 @@ class csv_writer_options {
   std::string _true_value = std::string{"true"};
   // string to use for values == 0 in INT8 types (default 'false')
   std::string _false_value = std::string{"false"};
-  // Optional associated metadata
-  table_metadata const* _metadata = nullptr;
+  // Names of all columns; if empty, writer will generate column names
+  std::vector<std::string> _names;
 
   /**
    * @brief Constructor from sink and table.
@@ -1387,11 +1387,11 @@ class csv_writer_options {
   [[nodiscard]] table_view const& get_table() const { return _table; }
 
   /**
-   * @brief Returns optional associated metadata.
+   * @brief Returns names of the columns.
    *
-   * @return Optional associated metadata
+   * @return Names of the columns in the output file
    */
-  [[nodiscard]] table_metadata const* get_metadata() const { return _metadata; }
+  [[nodiscard]] std::vector<std::string> const& get_names() const { return _names; }
 
   /**
    * @brief Returns string to used for null entries.
@@ -1444,11 +1444,11 @@ class csv_writer_options {
 
   // Setter
   /**
-   * @brief Sets optional associated metadata.
+   * @brief Sets optional associated column names.
    *
-   @param metadata Associated metadata
+   @param names Associated column names
    */
-  void set_metadata(table_metadata* metadata) { _metadata = metadata; }
+  void set_names(std::vector<std::string> names) { _names = std::move(names); }
 
   /**
    * @brief Sets string to used for null entries.
@@ -1526,14 +1526,14 @@ class csv_writer_options_builder {
   }
 
   /**
-   * @brief Sets optional associated metadata.
+   * @brief Sets optional column names.
    *
-   * @param metadata Associated metadata
+   * @param names Column names
    * @return this for chaining
    */
-  csv_writer_options_builder& metadata(table_metadata* metadata)
+  csv_writer_options_builder& names(std::vector<std::string> names)
   {
-    options._metadata = metadata;
+    options._names = names;
     return *this;
   }
 
diff --git a/cpp/include/cudf/io/detail/csv.hpp b/cpp/include/cudf/io/detail/csv.hpp
index 59de2ea2f12..90d730338fc 100644
--- a/cpp/include/cudf/io/detail/csv.hpp
+++ b/cpp/include/cudf/io/detail/csv.hpp
@@ -46,14 +46,14 @@ table_with_metadata read_csv(std::unique_ptr<cudf::io::datasource>&& source,
  *
  * @param sink Output sink
  * @param table The set of columns
- * @param metadata The metadata associated with the table
+ * @param column_names Column names for the output CSV
  * @param options Settings for controlling behavior
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @param mr Device memory resource to use for device memory allocation
  */
 void write_csv(data_sink* sink,
                table_view const& table,
-               const table_metadata* metadata,
+               host_span<std::string const> column_names,
                csv_writer_options const& options,
                rmm::cuda_stream_view stream,
                rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu
index 2fae7b4c75a..7230b455d4a 100644
--- a/cpp/src/io/csv/writer_impl.cu
+++ b/cpp/src/io/csv/writer_impl.cu
@@ -279,21 +279,21 @@ struct column_to_strings_fn {
 //
 void write_chunked_begin(data_sink* out_sink,
                          table_view const& table,
-                         table_metadata const* metadata,
+                         host_span<std::string const> user_column_names,
                          csv_writer_options const& options,
                          rmm::cuda_stream_view stream,
                          rmm::mr::device_memory_resource* mr)
 {
   if (options.is_enabled_include_header()) {
-    // need to generate column names if metadata is not provided
+    // need to generate column names if names are not provided
     std::vector<std::string> generated_col_names;
-    if (metadata == nullptr) {
+    if (user_column_names.empty()) {
       generated_col_names.resize(table.num_columns());
       thrust::tabulate(generated_col_names.begin(), generated_col_names.end(), [](auto idx) {
         return std::to_string(idx);
       });
     }
-    auto const& column_names = (metadata == nullptr) ? generated_col_names : metadata->column_names;
+    auto const& column_names = user_column_names.empty() ? generated_col_names : user_column_names;
     CUDF_EXPECTS(column_names.size() == static_cast<size_t>(table.num_columns()),
                  "Mismatch between number of column headers and table columns.");
 
@@ -346,7 +346,6 @@ void write_chunked_begin(data_sink* out_sink,
 
 void write_chunked(data_sink* out_sink,
                    strings_column_view const& str_column_view,
-                   table_metadata const* metadata,
                    csv_writer_options const& options,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
@@ -399,7 +398,7 @@ void write_chunked(data_sink* out_sink,
 
 void write_csv(data_sink* out_sink,
                table_view const& table,
-               table_metadata const* metadata,
+               host_span<std::string const> user_column_names,
                csv_writer_options const& options,
                rmm::cuda_stream_view stream,
                rmm::mr::device_memory_resource* mr)
@@ -407,7 +406,7 @@ void write_csv(data_sink* out_sink,
   // write header: column names separated by delimiter:
   // (even for tables with no rows)
   //
-  write_chunked_begin(out_sink, table, metadata, options, stream, mr);
+  write_chunked_begin(out_sink, table, user_column_names, options, stream, mr);
 
   if (table.num_rows() > 0) {
     // no need to check same-size columns constraint; auto-enforced by table_view
@@ -476,7 +475,7 @@ void write_csv(data_sink* out_sink,
         return cudf::strings::detail::replace_nulls(str_table_view.column(0), narep, stream);
       }();
 
-      write_chunked(out_sink, str_concat_col->view(), metadata, options, stream, mr);
+      write_chunked(out_sink, str_concat_col->view(), options, stream, mr);
     }
   }
 }
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index f58a34a5b7b..94a191147c2 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -231,7 +231,7 @@ void write_csv(csv_writer_options const& options, rmm::mr::device_memory_resourc
   return csv::write_csv(  //
     sinks[0].get(),
     options.get_table(),
-    options.get_metadata(),
+    options.get_names(),
     options,
     cudf::get_default_stream(),
     mr);
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index f532836ef95..8100c8e3d7f 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -277,30 +277,12 @@ void expect_column_data_equal(std::vector<T> const& lhs, cudf::column_view const
 
 void write_csv_helper(std::string const& filename,
                       cudf::table_view const& table,
-                      bool include_header,
                       std::vector<std::string> const& names = {})
 {
-  // csv_writer_options only keeps a pointer to metadata (non-owning)
-  cudf::io::table_metadata metadata{};
-
-  if (not names.empty()) {
-    metadata.column_names = names;
-  } else {
-    // generate some dummy column names
-    int i                  = 0;
-    auto const num_columns = table.num_columns();
-    metadata.column_names.reserve(num_columns);
-    std::generate_n(std::back_inserter(metadata.column_names), num_columns, [&i]() {
-      return std::string("col") + std::to_string(i++);
-    });
-  }
-
   cudf::io::csv_writer_options writer_options =
     cudf::io::csv_writer_options::builder(cudf::io::sink_info(filename), table)
-      .include_header(include_header)
-      .rows_per_chunk(
-        1)  // Note: this gets adjusted to multiple of 8 (per legacy code logic and requirements)
-      .metadata(&metadata);
+      .include_header(not names.empty())
+      .names(names);
 
   cudf::io::write_csv(writer_options);
 }
@@ -1509,7 +1491,7 @@ TYPED_TEST(CsvReaderNumericTypeTest, SingleColumnWithWriter)
 
   auto filepath = temp_env->get_temp_filepath("SingleColumnWithWriter.csv");
 
-  write_csv_helper(filepath, input_table, false);
+  write_csv_helper(filepath, input_table);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
@@ -1577,7 +1559,7 @@ TEST_F(CsvReaderTest, MultiColumnWithWriter)
 
   auto filepath = temp_env->get_temp_dir() + "MultiColumnWithWriter.csv";
 
-  write_csv_helper(filepath, input_table, false);
+  write_csv_helper(filepath, input_table);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1625,7 +1607,7 @@ TEST_F(CsvReaderTest, DatesWithWriter)
   cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
   // TODO need to add a dayfirst flag?
-  write_csv_helper(filepath, input_table, false);
+  write_csv_helper(filepath, input_table);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1650,7 +1632,7 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
-    write_csv_helper(filepath, input_table, false);
+    write_csv_helper(filepath, input_table);
 
     cudf::io::csv_reader_options in_opts =
       cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1673,7 +1655,7 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
-    write_csv_helper(filepath, input_table, false);
+    write_csv_helper(filepath, input_table);
 
     cudf::io::csv_reader_options in_opts =
       cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1696,7 +1678,7 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
-    write_csv_helper(filepath, input_table, false);
+    write_csv_helper(filepath, input_table);
 
     cudf::io::csv_reader_options in_opts =
       cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1720,7 +1702,7 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
-    write_csv_helper(filepath, input_table, false);
+    write_csv_helper(filepath, input_table);
 
     cudf::io::csv_reader_options in_opts =
       cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1743,7 +1725,7 @@ TEST_F(CsvReaderTest, DatesStringWithWriter)
 
     cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
-    write_csv_helper(filepath, input_table, false);
+    write_csv_helper(filepath, input_table);
 
     cudf::io::csv_reader_options in_opts =
       cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1766,7 +1748,7 @@ TEST_F(CsvReaderTest, FloatingPointWithWriter)
   cudf::table_view input_table(std::vector<cudf::column_view>{input_column});
 
   // TODO add lineterminator=";"
-  write_csv_helper(filepath, input_table, false);
+  write_csv_helper(filepath, input_table);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1792,11 +1774,10 @@ TEST_F(CsvReaderTest, StringsWithWriter)
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, string_column});
 
   // TODO add quoting style flag?
-  write_csv_helper(filepath, input_table, true, names);
+  write_csv_helper(filepath, input_table, names);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
-      .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
       .quoting(cudf::io::quote_style::NONE);
   auto result = cudf::io::read_csv(in_opts);
@@ -1804,6 +1785,7 @@ TEST_F(CsvReaderTest, StringsWithWriter)
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0));
   check_string_column(input_table.column(1), result_table.column(1));
+  ASSERT_EQ(names, result.metadata.column_names);
 }
 
 TEST_F(CsvReaderTest, StringsWithWriterSimple)
@@ -1817,11 +1799,10 @@ TEST_F(CsvReaderTest, StringsWithWriterSimple)
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, string_column});
 
   // TODO add quoting style flag?
-  write_csv_helper(filepath, input_table, true, names);
+  write_csv_helper(filepath, input_table, names);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
-      .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()})
       .quoting(cudf::io::quote_style::NONE);
   auto result = cudf::io::read_csv(in_opts);
@@ -1829,6 +1810,7 @@ TEST_F(CsvReaderTest, StringsWithWriterSimple)
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(input_table.column(0), result_table.column(0));
   check_string_column(input_table.column(1), result_table.column(1));
+  ASSERT_EQ(names, result.metadata.column_names);
 }
 
 TEST_F(CsvReaderTest, StringsEmbeddedDelimiter)
@@ -1841,15 +1823,15 @@ TEST_F(CsvReaderTest, StringsEmbeddedDelimiter)
   auto string_column = column_wrapper<cudf::string_view>{"abc def ghi", "jkl,mno,pq", "stu vwx y"};
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, string_column});
 
-  write_csv_helper(filepath, input_table, true, names);
+  write_csv_helper(filepath, input_table, names);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
-      .names(names)
       .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<cudf::string_view>()});
   auto result = cudf::io::read_csv(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view());
+  ASSERT_EQ(names, result.metadata.column_names);
 }
 
 TEST_F(CsvReaderTest, HeaderEmbeddedDelimiter)
@@ -1864,7 +1846,7 @@ TEST_F(CsvReaderTest, HeaderEmbeddedDelimiter)
   cudf::table_view input_table(
     std::vector<cudf::column_view>{int_column, string_column, int_column, int_column, int_column});
 
-  write_csv_helper(filepath, input_table, true, names);
+  write_csv_helper(filepath, input_table, names);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1877,6 +1859,7 @@ TEST_F(CsvReaderTest, HeaderEmbeddedDelimiter)
   auto result = cudf::io::read_csv(in_opts);
 
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result.tbl->view());
+  ASSERT_EQ(names, result.metadata.column_names);
 }
 
 TEST_F(CsvReaderTest, EmptyFileWithWriter)
@@ -1884,7 +1867,7 @@ TEST_F(CsvReaderTest, EmptyFileWithWriter)
   auto filepath = temp_env->get_temp_dir() + "EmptyFileWithWriter.csv";
 
   cudf::table_view empty_table;
-  write_csv_helper(filepath, empty_table, false);
+  write_csv_helper(filepath, empty_table);
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath});
   auto result = cudf::io::read_csv(in_opts);
@@ -1968,7 +1951,7 @@ TEST_F(CsvReaderTest, DurationsWithWriter)
     durations_D, durations_s, durations_ms, durations_us, durations_ns});
   std::vector<std::string> names{"D", "s", "ms", "us", "ns"};
 
-  write_csv_helper(filepath, input_table, true, names);
+  write_csv_helper(filepath, input_table, names);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath})
@@ -1982,6 +1965,7 @@ TEST_F(CsvReaderTest, DurationsWithWriter)
 
   const auto result_table = result.tbl->view();
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, result_table);
+  ASSERT_EQ(names, result.metadata.column_names);
 }
 
 TEST_F(CsvReaderTest, ParseInRangeIntegers)
@@ -2044,7 +2028,7 @@ TEST_F(CsvReaderTest, ParseInRangeIntegers)
 
   auto filepath = temp_env->get_temp_filepath("ParseInRangeIntegers.csv");
 
-  write_csv_helper(filepath, input_table, false);
+  write_csv_helper(filepath, input_table);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
@@ -2123,7 +2107,7 @@ TEST_F(CsvReaderTest, ParseOutOfRangeIntegers)
 
   auto filepath = temp_env->get_temp_filepath("ParseOutOfRangeIntegers.csv");
 
-  write_csv_helper(filepath, input_table, false);
+  write_csv_helper(filepath, input_table);
 
   cudf::io::csv_reader_options in_opts =
     cudf::io::csv_reader_options::builder(cudf::io::source_info{filepath}).header(-1);
diff --git a/python/cudf/cudf/_lib/cpp/io/csv.pxd b/python/cudf/cudf/_lib/cpp/io/csv.pxd
index e8064557592..e7c0fec2e3d 100644
--- a/python/cudf/cudf/_lib/cpp/io/csv.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/csv.pxd
@@ -199,6 +199,7 @@ cdef extern from "cudf/io/csv.hpp" \
         char get_inter_column_delimiter() except +
         string get_true_value() except +
         string get_false_value() except +
+        vector[string] get_names() except +
 
         # setter
         void set_metadata(cudf_io_types.table_metadata* val) except +
@@ -207,8 +208,9 @@ cdef extern from "cudf/io/csv.hpp" \
         void set_rows_per_chunk(size_type val) except +
         void set_line_terminator(string term) except +
         void set_inter_column_delimiter(char delim) except +
-        void set__true_value(string val) except +
+        void set_true_value(string val) except +
         void set_false_value(string val) except +
+        void set_names(vector[string] val) except +
 
         @staticmethod
         csv_writer_options_builder builder(
@@ -223,9 +225,7 @@ cdef extern from "cudf/io/csv.hpp" \
             cudf_table_view.table_view table
         ) except +
 
-        csv_writer_options_builder& metadata(
-            cudf_io_types.table_metadata* val
-        ) except +
+        csv_writer_options_builder& names(vector[string] val) except +
         csv_writer_options_builder& na_rep(string val) except +
         csv_writer_options_builder& include_header(bool val) except +
         csv_writer_options_builder& rows_per_chunk(size_type val) except +
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 920d423adc5..86808b8ba00 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -40,7 +40,6 @@ from cudf._lib.cpp.io.types cimport (
     quote_style,
     sink_info,
     source_info,
-    table_metadata,
     table_with_metadata,
 )
 from cudf._lib.cpp.table.table_view cimport table_view
@@ -469,7 +468,7 @@ cpdef write_csv(
     cdef string line_term_c = line_terminator.encode()
     cdef string na_c = na_rep.encode()
     cdef int rows_per_chunk_c = rows_per_chunk
-    cdef table_metadata metadata_ = table_metadata()
+    cdef vector[string] col_names
     cdef string true_value_c = 'True'.encode()
     cdef string false_value_c = 'False'.encode()
     cdef unique_ptr[data_sink] data_sink_c
@@ -481,26 +480,26 @@ cpdef write_csv(
             all_names = table._index.names + all_names
 
         if len(all_names) > 0:
-            metadata_.column_names.reserve(len(all_names))
+            col_names.reserve(len(all_names))
             if len(all_names) == 1:
                 if all_names[0] in (None, ''):
-                    metadata_.column_names.push_back('""'.encode())
+                    col_names.push_back('""'.encode())
                 else:
-                    metadata_.column_names.push_back(
+                    col_names.push_back(
                         str(all_names[0]).encode()
                     )
             else:
                 for idx, col_name in enumerate(all_names):
                     if col_name is None:
-                        metadata_.column_names.push_back(''.encode())
+                        col_names.push_back(''.encode())
                     else:
-                        metadata_.column_names.push_back(
+                        col_names.push_back(
                             str(col_name).encode()
                         )
 
     cdef csv_writer_options options = move(
         csv_writer_options.builder(sink_info_c, input_table_view)
-        .metadata(&metadata_)
+        .names(col_names)
         .na_rep(na_c)
         .include_header(include_header_c)
         .rows_per_chunk(rows_per_chunk_c)

From a5aaa52a06b83e01357169a5110cbdae65888e6d Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 1 Nov 2022 09:15:59 -0400
Subject: [PATCH 099/202] Remove default parameters for
 cudf::dictionary::detail functions (#12006)

Removes default parameters from the `cudf::dictionary::detail` functions. Most of these were allowing for the default memory-resource which is unnecessary. One non-stream, non-mr parameter was defaulted but the default was never used.

Reference #11967

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Tobias Ribizel (https://github.com/upsj)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12006
---
 cpp/include/cudf/detail/scatter.cuh           |  3 +-
 .../cudf/dictionary/detail/concatenate.hpp    |  7 ++--
 cpp/include/cudf/dictionary/detail/encode.hpp | 16 ++++----
 .../cudf/dictionary/detail/replace.hpp        | 18 ++++-----
 cpp/include/cudf/dictionary/detail/search.hpp | 18 ++++-----
 .../cudf/dictionary/detail/update_keys.hpp    | 38 +++++++++----------
 cpp/src/copying/copy_range.cu                 |  3 +-
 cpp/src/copying/scatter.cu                    |  5 ++-
 cpp/src/dictionary/add_keys.cu                |  9 ++---
 cpp/src/dictionary/remove_keys.cu             | 25 ++++++------
 cpp/src/dictionary/replace.cu                 |  7 ++--
 cpp/src/dictionary/set_keys.cu                |  9 ++---
 cpp/src/filling/fill.cu                       |  3 +-
 cpp/src/replace/clamp.cu                      | 11 ++++--
 cpp/src/replace/replace.cu                    |  7 ++--
 cpp/src/search/contains_column.cu             |  8 ++--
 cpp/src/search/contains_scalar.cu             |  3 +-
 cpp/src/search/search_ordered.cu              |  3 +-
 cpp/tests/dictionary/search_test.cpp          | 18 ++++++---
 cpp/tests/replace/replace_nulls_tests.cpp     | 18 +++------
 20 files changed, 112 insertions(+), 117 deletions(-)

diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index 88babe2f397..ad5a2134afe 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -218,7 +218,8 @@ struct column_scatterer_impl<dictionary32> {
     // first combine keys so both dictionaries have the same set
     auto target_matched    = dictionary::detail::add_keys(target, source.keys(), stream, mr);
     auto const target_view = dictionary_column_view(target_matched->view());
-    auto source_matched    = dictionary::detail::set_keys(source, target_view.keys(), stream);
+    auto source_matched    = dictionary::detail::set_keys(
+      source, target_view.keys(), stream, rmm::mr::get_current_device_resource());
     auto const source_view = dictionary_column_view(source_matched->view());
 
     // now build the new indices by doing a scatter on just the matched indices
diff --git a/cpp/include/cudf/dictionary/detail/concatenate.hpp b/cpp/include/cudf/dictionary/detail/concatenate.hpp
index 716caa3e304..d74429484ce 100644
--- a/cpp/include/cudf/dictionary/detail/concatenate.hpp
+++ b/cpp/include/cudf/dictionary/detail/concatenate.hpp
@@ -37,10 +37,9 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with concatenated results.
  */
-std::unique_ptr<column> concatenate(
-  host_span<column_view const> columns,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> concatenate(host_span<column_view const> columns,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace dictionary
diff --git a/cpp/include/cudf/dictionary/detail/encode.hpp b/cpp/include/cudf/dictionary/detail/encode.hpp
index a16d518dd0d..2aad7dd80ed 100644
--- a/cpp/include/cudf/dictionary/detail/encode.hpp
+++ b/cpp/include/cudf/dictionary/detail/encode.hpp
@@ -51,11 +51,10 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Returns a dictionary column.
  */
-std::unique_ptr<column> encode(
-  column_view const& column,
-  data_type indices_type              = data_type{type_id::UINT32},
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> encode(column_view const& column,
+                               data_type indices_type,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Create a column by gathering the keys from the provided
@@ -72,10 +71,9 @@ std::unique_ptr<column> encode(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with type matching the dictionary_column's keys.
  */
-std::unique_ptr<column> decode(
-  dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> decode(dictionary_column_view const& dictionary_column,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Return minimal integer type for the given number of elements.
diff --git a/cpp/include/cudf/dictionary/detail/replace.hpp b/cpp/include/cudf/dictionary/detail/replace.hpp
index 85e2d9a3a85..0778baa84d6 100644
--- a/cpp/include/cudf/dictionary/detail/replace.hpp
+++ b/cpp/include/cudf/dictionary/detail/replace.hpp
@@ -39,11 +39,10 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary column with null rows replaced.
  */
-std::unique_ptr<column> replace_nulls(
-  dictionary_column_view const& input,
-  dictionary_column_view const& replacement,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
+                                      dictionary_column_view const& replacement,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Create a new dictionary column by replacing nulls with a
@@ -57,11 +56,10 @@ std::unique_ptr<column> replace_nulls(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New dictionary column with null rows replaced.
  */
-std::unique_ptr<column> replace_nulls(
-  dictionary_column_view const& input,
-  scalar const& replacement,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
+                                      scalar const& replacement,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace dictionary
diff --git a/cpp/include/cudf/dictionary/detail/search.hpp b/cpp/include/cudf/dictionary/detail/search.hpp
index 2d65b561cd3..62059306b9a 100644
--- a/cpp/include/cudf/dictionary/detail/search.hpp
+++ b/cpp/include/cudf/dictionary/detail/search.hpp
@@ -31,11 +31,10 @@ namespace detail {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<scalar> get_index(
-  dictionary_column_view const& dictionary,
-  scalar const& key,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<scalar> get_index(dictionary_column_view const& dictionary,
+                                  scalar const& key,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Get the index for a key if it were added to the given dictionary.
@@ -56,11 +55,10 @@ std::unique_ptr<scalar> get_index(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Numeric scalar index value of the key within the dictionary
  */
-std::unique_ptr<scalar> get_insert_index(
-  dictionary_column_view const& dictionary,
-  scalar const& key,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<scalar> get_insert_index(dictionary_column_view const& dictionary,
+                                         scalar const& key,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace dictionary
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index 7f78effdd05..6fd743ad526 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -32,11 +32,10 @@ namespace detail {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> add_keys(
-  dictionary_column_view const& dictionary_column,
-  column_view const& new_keys,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column,
+                                 column_view const& new_keys,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::dictionary::remove_keys(dictionary_column_view const&,column_view
@@ -44,11 +43,10 @@ std::unique_ptr<column> add_keys(
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> remove_keys(
-  dictionary_column_view const& dictionary_column,
-  column_view const& keys_to_remove,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_column,
+                                    column_view const& keys_to_remove,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::dictionary::remove_unused_keys(dictionary_column_view
@@ -56,10 +54,9 @@ std::unique_ptr<column> remove_keys(
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> remove_unused_keys(
-  dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& dictionary_column,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::dictionary::set_keys(dictionary_column_view
@@ -67,11 +64,10 @@ std::unique_ptr<column> remove_unused_keys(
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> set_keys(
-  dictionary_column_view const& dictionary_column,
-  column_view const& keys,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column,
+                                 column_view const& keys,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc
@@ -82,7 +78,7 @@ std::unique_ptr<column> set_keys(
 std::vector<std::unique_ptr<column>> match_dictionaries(
   cudf::host_span<dictionary_column_view const> input,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Create new dictionaries that have keys merged from dictionary columns
@@ -106,7 +102,7 @@ std::vector<std::unique_ptr<column>> match_dictionaries(
 std::pair<std::vector<std::unique_ptr<column>>, std::vector<table_view>> match_dictionaries(
   std::vector<table_view> tables,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace dictionary
diff --git a/cpp/src/copying/copy_range.cu b/cpp/src/copying/copy_range.cu
index c5fa3a73e1a..dbcae354384 100644
--- a/cpp/src/copying/copy_range.cu
+++ b/cpp/src/copying/copy_range.cu
@@ -172,7 +172,8 @@ std::unique_ptr<cudf::column> out_of_place_copy_range_dispatch::operator()<cudf:
   auto target_matched =
     cudf::dictionary::detail::add_keys(dict_target, dict_source.keys(), stream, mr);
   auto const target_view = cudf::dictionary_column_view(target_matched->view());
-  auto source_matched = cudf::dictionary::detail::set_keys(dict_source, target_view.keys(), stream);
+  auto source_matched    = cudf::dictionary::detail::set_keys(
+    dict_source, target_view.keys(), stream, rmm::mr::get_current_device_resource());
   auto const source_view = cudf::dictionary_column_view(source_matched->view());
 
   // build the new indices by calling in_place_copy_range on just the indices
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 7b6ff80e3e4..4ebe465b945 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -184,8 +184,9 @@ struct column_scalar_scatterer_impl<dictionary32, MapIterator> {
                                    stream,
                                    mr);
     auto dict_view    = dictionary_column_view(dict_target->view());
-    auto scalar_index = dictionary::detail::get_index(dict_view, source.get(), stream);
-    auto scalar_iter  = thrust::make_permutation_iterator(
+    auto scalar_index = dictionary::detail::get_index(
+      dict_view, source.get(), stream, rmm::mr::get_current_device_resource());
+    auto scalar_iter = thrust::make_permutation_iterator(
       indexalator_factory::make_input_iterator(*scalar_index), thrust::make_constant_iterator(0));
     auto new_indices = std::make_unique<column>(dict_view.get_indices_annotated(), stream, mr);
     auto target_iter = indexalator_factory::make_output_iterator(new_indices->mutable_view());
diff --git a/cpp/src/dictionary/add_keys.cu b/cpp/src/dictionary/add_keys.cu
index 0c4e20aa97f..486e7d2d24b 100644
--- a/cpp/src/dictionary/add_keys.cu
+++ b/cpp/src/dictionary/add_keys.cu
@@ -44,11 +44,10 @@ namespace detail {
  * d2 is now {[a, b, c, d, e, f], [5, 0, 3, 1, 2, 2, 2, 5, 0]}
  * ```
  */
-std::unique_ptr<column> add_keys(
-  dictionary_column_view const& dictionary_column,
-  column_view const& new_keys,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column,
+                                 column_view const& new_keys,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(!new_keys.has_nulls(), "Keys must not have nulls");
   auto old_keys = dictionary_column.keys();  // [a,b,c,d,f]
diff --git a/cpp/src/dictionary/remove_keys.cu b/cpp/src/dictionary/remove_keys.cu
index 8a703959d9e..dcb877da686 100644
--- a/cpp/src/dictionary/remove_keys.cu
+++ b/cpp/src/dictionary/remove_keys.cu
@@ -56,11 +56,10 @@ namespace {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  */
 template <typename KeysKeeper>
-std::unique_ptr<column> remove_keys_fn(
-  dictionary_column_view const& dictionary_column,
-  KeysKeeper keys_to_keep_fn,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> remove_keys_fn(dictionary_column_view const& dictionary_column,
+                                       KeysKeeper keys_to_keep_fn,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr)
 {
   auto const keys_view    = dictionary_column.keys();
   auto const indices_type = dictionary_column.indices().type();
@@ -148,11 +147,10 @@ std::unique_ptr<column> remove_keys_fn(
 
 }  // namespace
 
-std::unique_ptr<column> remove_keys(
-  dictionary_column_view const& dictionary_column,
-  column_view const& keys_to_remove,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_column,
+                                    column_view const& keys_to_remove,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(!keys_to_remove.has_nulls(), "keys_to_remove must not have nulls");
   auto const keys_view = dictionary_column.keys();
@@ -166,10 +164,9 @@ std::unique_ptr<column> remove_keys(
   return remove_keys_fn(dictionary_column, key_matcher, stream, mr);
 }
 
-std::unique_ptr<column> remove_unused_keys(
-  dictionary_column_view const& dictionary_column,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& dictionary_column,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   // locate the keys to remove
   auto const keys_size     = dictionary_column.keys_size();
diff --git a/cpp/src/dictionary/replace.cu b/cpp/src/dictionary/replace.cu
index 4acc2d124b2..7069993866c 100644
--- a/cpp/src/dictionary/replace.cu
+++ b/cpp/src/dictionary/replace.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -123,8 +123,9 @@ std::unique_ptr<column> replace_nulls(dictionary_column_view const& input,
   // first add the replacement to the keys so only the indices need to be processed
   auto input_matched = dictionary::detail::add_keys(
     input, make_column_from_scalar(replacement, 1, stream)->view(), stream, mr);
-  auto const input_view   = dictionary_column_view(input_matched->view());
-  auto const scalar_index = get_index(input_view, replacement, stream);
+  auto const input_view = dictionary_column_view(input_matched->view());
+  auto const scalar_index =
+    get_index(input_view, replacement, stream, rmm::mr::get_current_device_resource());
 
   // now build the new indices by doing replace-null on the updated indices
   auto const input_indices = input_view.get_indices_annotated();
diff --git a/cpp/src/dictionary/set_keys.cu b/cpp/src/dictionary/set_keys.cu
index db0c4937582..075fb6115e3 100644
--- a/cpp/src/dictionary/set_keys.cu
+++ b/cpp/src/dictionary/set_keys.cu
@@ -116,11 +116,10 @@ struct dispatch_compute_indices {
 }  // namespace
 
 //
-std::unique_ptr<column> set_keys(
-  dictionary_column_view const& dictionary_column,
-  column_view const& new_keys,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column,
+                                 column_view const& new_keys,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(!new_keys.has_nulls(), "keys parameter must not have nulls");
   auto keys = dictionary_column.keys();
diff --git a/cpp/src/filling/fill.cu b/cpp/src/filling/fill.cu
index 290fff33cf6..dac36032583 100644
--- a/cpp/src/filling/fill.cu
+++ b/cpp/src/filling/fill.cu
@@ -171,7 +171,8 @@ std::unique_ptr<cudf::column> out_of_place_fill_range_dispatch::operator()<cudf:
     cudf::dictionary_column_view(target_matched->view()).get_indices_annotated();
 
   // get the index of the key just added
-  auto index_of_value = cudf::dictionary::detail::get_index(target_matched->view(), value, stream);
+  auto index_of_value = cudf::dictionary::detail::get_index(
+    target_matched->view(), value, stream, rmm::mr::get_current_device_resource());
   // now call fill using just the indices column and the new index
   auto new_indices =
     cudf::type_dispatcher(target_indices.type(),
diff --git a/cpp/src/replace/clamp.cu b/cpp/src/replace/clamp.cu
index 24822cc6c65..d54ebf25494 100644
--- a/cpp/src/replace/clamp.cu
+++ b/cpp/src/replace/clamp.cu
@@ -300,14 +300,17 @@ std::unique_ptr<column> dispatch_clamp::operator()<cudf::dictionary32>(
     return result;
   }();
   auto matched_view = dictionary_column_view(matched_column->view());
+  auto default_mr   = rmm::mr::get_current_device_resource();
 
   // get the indexes for lo_replace and for hi_replace
-  auto lo_replace_index = dictionary::detail::get_index(matched_view, lo_replace, stream);
-  auto hi_replace_index = dictionary::detail::get_index(matched_view, hi_replace, stream);
+  auto lo_replace_index =
+    dictionary::detail::get_index(matched_view, lo_replace, stream, default_mr);
+  auto hi_replace_index =
+    dictionary::detail::get_index(matched_view, hi_replace, stream, default_mr);
 
   // get the closest indexes for lo and for hi
-  auto lo_index = dictionary::detail::get_insert_index(matched_view, lo, stream);
-  auto hi_index = dictionary::detail::get_insert_index(matched_view, hi, stream);
+  auto lo_index = dictionary::detail::get_insert_index(matched_view, lo, stream, default_mr);
+  auto hi_index = dictionary::detail::get_insert_index(matched_view, hi, stream, default_mr);
 
   // call clamp with the scalar indexes and the matched indices
   auto matched_indices = matched_view.get_indices_annotated();
diff --git a/cpp/src/replace/replace.cu b/cpp/src/replace/replace.cu
index 2a675c00b48..b3ee6e069ed 100644
--- a/cpp/src/replace/replace.cu
+++ b/cpp/src/replace/replace.cu
@@ -457,9 +457,10 @@ std::unique_ptr<cudf::column> replace_kernel_forwarder::operator()<cudf::diction
     return cudf::dictionary::detail::add_keys(input, new_keys->view(), stream, mr);
   }();
   auto matched_view   = cudf::dictionary_column_view(matched_input->view());
-  auto matched_values = cudf::dictionary::detail::set_keys(values, matched_view.keys(), stream);
-  auto matched_replacements =
-    cudf::dictionary::detail::set_keys(replacements, matched_view.keys(), stream);
+  auto matched_values = cudf::dictionary::detail::set_keys(
+    values, matched_view.keys(), stream, rmm::mr::get_current_device_resource());
+  auto matched_replacements = cudf::dictionary::detail::set_keys(
+    replacements, matched_view.keys(), stream, rmm::mr::get_current_device_resource());
 
   auto indices_type = matched_view.indices().type();
   auto new_indices  = cudf::type_dispatcher<cudf::dispatch_storage_type>(
diff --git a/cpp/src/search/contains_column.cu b/cpp/src/search/contains_column.cu
index 31edf88a8cf..08bcf8d48d8 100644
--- a/cpp/src/search/contains_column.cu
+++ b/cpp/src/search/contains_column.cu
@@ -119,9 +119,11 @@ std::unique_ptr<column> contains_column_dispatch::operator()<dictionary32>(
   dictionary_column_view const haystack(haystack_in);
   dictionary_column_view const needles(needles_in);
   // first combine keys so both dictionaries have the same set
-  auto needles_matched     = dictionary::detail::add_keys(needles, haystack.keys(), stream);
-  auto const needles_view  = dictionary_column_view(needles_matched->view());
-  auto haystack_matched    = dictionary::detail::set_keys(haystack, needles_view.keys(), stream);
+  auto needles_matched = dictionary::detail::add_keys(
+    needles, haystack.keys(), stream, rmm::mr::get_current_device_resource());
+  auto const needles_view = dictionary_column_view(needles_matched->view());
+  auto haystack_matched   = dictionary::detail::set_keys(
+    haystack, needles_view.keys(), stream, rmm::mr::get_current_device_resource());
   auto const haystack_view = dictionary_column_view(haystack_matched->view());
 
   // now just use the indices for the contains
diff --git a/cpp/src/search/contains_scalar.cu b/cpp/src/search/contains_scalar.cu
index 59c7a86d29c..8c500e1e757 100644
--- a/cpp/src/search/contains_scalar.cu
+++ b/cpp/src/search/contains_scalar.cu
@@ -128,7 +128,8 @@ bool contains_scalar_dispatch::operator()<cudf::dictionary32>(column_view const&
 {
   auto const dict_col = cudf::dictionary_column_view(haystack);
   // first, find the needle in the dictionary's key set
-  auto const index = cudf::dictionary::detail::get_index(dict_col, needle, stream);
+  auto const index = cudf::dictionary::detail::get_index(
+    dict_col, needle, stream, rmm::mr::get_current_device_resource());
   // if found, check the index is actually in the indices column
   return index->is_valid(stream) && cudf::type_dispatcher(dict_col.indices().type(),
                                                           contains_scalar_dispatch{},
diff --git a/cpp/src/search/search_ordered.cu b/cpp/src/search/search_ordered.cu
index 1da8d2313e6..bf0eb8d46f8 100644
--- a/cpp/src/search/search_ordered.cu
+++ b/cpp/src/search/search_ordered.cu
@@ -61,7 +61,8 @@ std::unique_ptr<column> search_ordered(table_view const& haystack,
 
   // This utility will ensure all corresponding dictionary columns have matching keys.
   // It will return any new dictionary columns created as well as updated table_views.
-  auto const matched = dictionary::detail::match_dictionaries({haystack, needles}, stream);
+  auto const matched = dictionary::detail::match_dictionaries(
+    {haystack, needles}, stream, rmm::mr::get_current_device_resource());
   auto const& matched_haystack = matched.second.front();
   auto const& matched_needles  = matched.second.back();
 
diff --git a/cpp/tests/dictionary/search_test.cpp b/cpp/tests/dictionary/search_test.cpp
index 8b77d71593d..11cafa7dd8e 100644
--- a/cpp/tests/dictionary/search_test.cpp
+++ b/cpp/tests/dictionary/search_test.cpp
@@ -35,8 +35,10 @@ TEST_F(DictionarySearchTest, StringsColumn)
 
   result = cudf::dictionary::get_index(dictionary, cudf::string_scalar("eee"));
   EXPECT_FALSE(result->is_valid());
-  result = cudf::dictionary::detail::get_insert_index(
-    dictionary, cudf::string_scalar("eee"), cudf::get_default_stream());
+  result   = cudf::dictionary::detail::get_insert_index(dictionary,
+                                                      cudf::string_scalar("eee"),
+                                                      cudf::get_default_stream(),
+                                                      rmm::mr::get_current_device_resource());
   n_result = dynamic_cast<cudf::numeric_scalar<uint32_t>*>(result.get());
   EXPECT_EQ(uint32_t{5}, n_result->value());
 }
@@ -52,8 +54,10 @@ TEST_F(DictionarySearchTest, WithNulls)
 
   result = cudf::dictionary::get_index(dictionary, cudf::numeric_scalar<int64_t>(5));
   EXPECT_FALSE(result->is_valid());
-  result = cudf::dictionary::detail::get_insert_index(
-    dictionary, cudf::numeric_scalar<int64_t>(5), cudf::get_default_stream());
+  result   = cudf::dictionary::detail::get_insert_index(dictionary,
+                                                      cudf::numeric_scalar<int64_t>(5),
+                                                      cudf::get_default_stream(),
+                                                      rmm::mr::get_current_device_resource());
   n_result = dynamic_cast<cudf::numeric_scalar<uint32_t>*>(result.get());
   EXPECT_EQ(uint32_t{1}, n_result->value());
 }
@@ -64,7 +68,8 @@ TEST_F(DictionarySearchTest, EmptyColumn)
   cudf::numeric_scalar<int64_t> key(7);
   auto result = cudf::dictionary::get_index(dictionary, key);
   EXPECT_FALSE(result->is_valid());
-  result = cudf::dictionary::detail::get_insert_index(dictionary, key, cudf::get_default_stream());
+  result = cudf::dictionary::detail::get_insert_index(
+    dictionary, key, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   EXPECT_FALSE(result->is_valid());
 }
 
@@ -74,6 +79,7 @@ TEST_F(DictionarySearchTest, Errors)
   cudf::numeric_scalar<double> key(7);
   EXPECT_THROW(cudf::dictionary::get_index(dictionary, key), cudf::logic_error);
   EXPECT_THROW(
-    cudf::dictionary::detail::get_insert_index(dictionary, key, cudf::get_default_stream()),
+    cudf::dictionary::detail::get_insert_index(
+      dictionary, key, cudf::get_default_stream(), rmm::mr::get_current_device_resource()),
     cudf::logic_error);
 }
diff --git a/cpp/tests/replace/replace_nulls_tests.cpp b/cpp/tests/replace/replace_nulls_tests.cpp
index 2c751a67a63..616ba9d2f64 100644
--- a/cpp/tests/replace/replace_nulls_tests.cpp
+++ b/cpp/tests/replace/replace_nulls_tests.cpp
@@ -21,7 +21,6 @@
 
 #include <tests/groupby/groupby_test_util.hpp>
 
-#include <cudf/dictionary/detail/replace.hpp>
 #include <cudf/dictionary/encode.hpp>
 #include <cudf/fixed_point/fixed_point.hpp>
 #include <cudf/scalar/scalar.hpp>
@@ -679,32 +678,25 @@ TEST_F(ReplaceDictionaryTest, ReplaceNullsError)
   auto input_one  = cudf::dictionary::encode(input_one_w);
   auto dict_input = cudf::dictionary_column_view(input_one->view());
   auto dict_repl  = cudf::dictionary_column_view(replacement->view());
-  EXPECT_THROW(
-    cudf::dictionary::detail::replace_nulls(dict_input, dict_repl, cudf::get_default_stream()),
-    cudf::logic_error);
+  EXPECT_THROW(cudf::replace_nulls(input->view(), replacement->view()), cudf::logic_error);
 }
 
 TEST_F(ReplaceDictionaryTest, ReplaceNullsEmpty)
 {
   cudf::test::fixed_width_column_wrapper<int64_t> input_empty_w({});
   auto input_empty = cudf::dictionary::encode(input_empty_w);
-  auto dict_input  = cudf::dictionary_column_view(input_empty->view());
-  auto result =
-    cudf::dictionary::detail::replace_nulls(dict_input, dict_input, cudf::get_default_stream());
+  auto result      = cudf::replace_nulls(input_empty->view(), input_empty->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input_empty->view());
 }
 
 TEST_F(ReplaceDictionaryTest, ReplaceNullsNoNulls)
 {
   cudf::test::fixed_width_column_wrapper<int8_t> input_w({1, 1, 1});
-  auto input      = cudf::dictionary::encode(input_w);
-  auto dict_input = cudf::dictionary_column_view(input->view());
-  auto result =
-    cudf::dictionary::detail::replace_nulls(dict_input, dict_input, cudf::get_default_stream());
+  auto input  = cudf::dictionary::encode(input_w);
+  auto result = cudf::replace_nulls(input->view(), input->view());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input->view());
 
-  result = cudf::dictionary::detail::replace_nulls(
-    dict_input, cudf::numeric_scalar<int64_t>(0, false), cudf::get_default_stream());
+  result = cudf::replace_nulls(input->view(), cudf::numeric_scalar<int8_t>(0, false));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), input->view());
 }
 

From 991c86b13acdbc28ab60609bee6eba2f9eac1ecc Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 1 Nov 2022 10:00:36 -0400
Subject: [PATCH 100/202] Remove default parameters for nvtext::detail
 functions (#12007)

Removes default parameters from the `nvtext::detail` functions. Most of these were internal default parameters which were unnecessary. The nvtext detail functions are only used within nvtext APIs.

Reference #11967

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12007
---
 cpp/include/nvtext/detail/tokenize.hpp | 38 +++++++++++---------------
 cpp/src/text/generate_ngrams.cu        | 11 ++++----
 cpp/src/text/ngrams_tokenize.cu        | 13 ++++-----
 cpp/src/text/normalize.cu              |  7 ++---
 4 files changed, 30 insertions(+), 39 deletions(-)

diff --git a/cpp/include/nvtext/detail/tokenize.hpp b/cpp/include/nvtext/detail/tokenize.hpp
index 9c1cdbd6310..38b49e63590 100644
--- a/cpp/include/nvtext/detail/tokenize.hpp
+++ b/cpp/include/nvtext/detail/tokenize.hpp
@@ -35,12 +35,10 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of tokens.
  */
-std::unique_ptr<cudf::column> tokenize(
-  cudf::strings_column_view const& strings,
-  cudf::string_scalar const& delimiter = cudf::string_scalar{""},
-  // Move before delimiter?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
+                                       cudf::string_scalar const& delimiter,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc nvtext::tokenize(strings_column_view const&,strings_column_view
@@ -52,11 +50,10 @@ std::unique_ptr<cudf::column> tokenize(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings columns of tokens.
  */
-std::unique_ptr<cudf::column> tokenize(
-  cudf::strings_column_view const& strings,
-  cudf::strings_column_view const& delimiters,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<cudf::column> tokenize(cudf::strings_column_view const& strings,
+                                       cudf::strings_column_view const& delimiters,
+                                       rmm::cuda_stream_view stream,
+                                       rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc nvtext::count_tokens(strings_column_view const&, string_scalar
@@ -69,12 +66,10 @@ std::unique_ptr<cudf::column> tokenize(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column of token counts.
  */
-std::unique_ptr<cudf::column> count_tokens(
-  cudf::strings_column_view const& strings,
-  cudf::string_scalar const& delimiter = cudf::string_scalar{""},
-  // Move before delimiter?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
+                                           cudf::string_scalar const& delimiter,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc nvtext::count_tokens(strings_column_view const&,strings_column_view
@@ -86,11 +81,10 @@ std::unique_ptr<cudf::column> count_tokens(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New INT32 column of token counts.
  */
-std::unique_ptr<cudf::column> count_tokens(
-  cudf::strings_column_view const& strings,
-  cudf::strings_column_view const& delimiters,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<cudf::column> count_tokens(cudf::strings_column_view const& strings,
+                                           cudf::strings_column_view const& delimiters,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace nvtext
diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu
index d5ff7b99344..be50ece28d5 100644
--- a/cpp/src/text/generate_ngrams.cu
+++ b/cpp/src/text/generate_ngrams.cu
@@ -84,12 +84,11 @@ struct ngram_generator_fn {
 
 }  // namespace
 
-std::unique_ptr<cudf::column> generate_ngrams(
-  cudf::strings_column_view const& strings,
-  cudf::size_type ngrams               = 2,
-  cudf::string_scalar const& separator = cudf::string_scalar{"_"},
-  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource())
+std::unique_ptr<cudf::column> generate_ngrams(cudf::strings_column_view const& strings,
+                                              cudf::size_type ngrams,
+                                              cudf::string_scalar const& separator,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(separator.is_valid(stream), "Parameter separator must be valid");
   cudf::string_view const d_separator(separator.data(), separator.size());
diff --git a/cpp/src/text/ngrams_tokenize.cu b/cpp/src/text/ngrams_tokenize.cu
index b0071ed9e88..f1ddcfdc6f8 100644
--- a/cpp/src/text/ngrams_tokenize.cu
+++ b/cpp/src/text/ngrams_tokenize.cu
@@ -134,13 +134,12 @@ struct ngram_builder_fn {
 
 // detail APIs
 
-std::unique_ptr<cudf::column> ngrams_tokenize(
-  cudf::strings_column_view const& strings,
-  cudf::size_type ngrams               = 2,
-  cudf::string_scalar const& delimiter = cudf::string_scalar(""),
-  cudf::string_scalar const& separator = cudf::string_scalar{"_"},
-  rmm::cuda_stream_view stream         = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr  = rmm::mr::get_current_device_resource())
+std::unique_ptr<cudf::column> ngrams_tokenize(cudf::strings_column_view const& strings,
+                                              cudf::size_type ngrams,
+                                              cudf::string_scalar const& delimiter,
+                                              cudf::string_scalar const& separator,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
   cudf::string_view d_delimiter(delimiter.data(), delimiter.size());
diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu
index 2d5dd0ebbf8..2931370ac02 100644
--- a/cpp/src/text/normalize.cu
+++ b/cpp/src/text/normalize.cu
@@ -170,10 +170,9 @@ struct codepoint_to_utf8_fn {
 }  // namespace
 
 // detail API
-std::unique_ptr<cudf::column> normalize_spaces(
-  cudf::strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<cudf::column> normalize_spaces(cudf::strings_column_view const& strings,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::mr::device_memory_resource* mr)
 {
   if (strings.is_empty()) return cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING});
 

From 7af461cd0a4b616932e2766050a87d4cc82cd963 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 1 Nov 2022 10:03:17 -0500
Subject: [PATCH 101/202] Update cuda-python dependency to 11.7.1 (#12030)

This is a mirror PR of https://github.com/rapidsai/cudf/pull/11994 to unblock gpu-ci which is currently blocked.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Ray Douglass (https://github.com/raydouglass)
  - Ashwin Srinath (https://github.com/shwina)
  - Bradley Dice (https://github.com/bdice)
  - Jordan Jacobelli (https://github.com/Ethyling)

Approvers:
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12030
---
 README.md                                | 29 ++++++++----------------
 conda/environments/cudf_dev_cuda11.5.yml |  4 ++--
 conda/recipes/cudf/meta.yaml             |  2 +-
 conda/recipes/strings_udf/meta.yaml      |  2 +-
 4 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index 175f5e7efa8..641ce1316b3 100644
--- a/README.md
+++ b/README.md
@@ -65,32 +65,21 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids
 
 cuDF can be installed with conda ([miniconda](https://conda.io/miniconda.html), or the full [Anaconda distribution](https://www.anaconda.com/download)) from the `rapidsai` channel:
 
-For `cudf version == 22.06` :
 ```bash
-# for CUDA 11.0
-conda install -c rapidsai -c nvidia -c numba -c conda-forge \
-    cudf=22.06 python=3.9 cudatoolkit=11.0
-
-# or, for CUDA 11.2
-conda install -c rapidsai -c nvidia -c numba -c conda-forge \
-    cudf=22.06 python=3.9 cudatoolkit=11.2
-
+# for CUDA 11.5
+conda install -c rapidsai -c conda-forge -c nvidia \
+    cudf=22.10 python=3.9 cudatoolkit=11.5
+# for CUDA 11.2
+conda install -c rapidsai -c conda-forge -c nvidia \
+    cudf=22.10 python=3.9 cudatoolkit=11.2
 ```
 
-For the nightly version of `cudf` :
-```bash
-# for CUDA 11.0
-conda install -c rapidsai-nightly -c nvidia -c numba -c conda-forge \
-    cudf python=3.9 cudatoolkit=11.0
-
-# or, for CUDA 11.2
-conda install -c rapidsai-nightly -c nvidia -c numba -c conda-forge \
-    cudf python=3.9 cudatoolkit=11.2
-```
+We also provide [nightly Conda packages](https://anaconda.org/rapidsai-nightly) built from the HEAD
+of our latest development branch.
 
 Note: cuDF is supported only on Linux, and with Python versions 3.8 and later.
 
-See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info. 
+See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info.
 
 ## Build/Install from Source
 See build [instructions](CONTRIBUTING.md#setting-up-your-build-environment).
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index d7178198358..2cad2002456 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -3,10 +3,10 @@
 name: cudf_dev
 channels:
   - rapidsai
-  - nvidia
   - rapidsai-nightly
   - dask/label/dev
   - conda-forge
+  - nvidia
 dependencies:
   - c-compiler
   - cxx-compiler
@@ -38,7 +38,7 @@ dependencies:
   - ipython
   - pandoc<=2.0.0
   - cudatoolkit=11.5
-  - cuda-python>=11.5,<11.7.1
+  - cuda-python>=11.7.1,<12.0
   - pip
   - doxygen=1.8.20
   - typing_extensions
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 9b8e379b25e..380b3652fbb 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -62,7 +62,7 @@ requirements:
     - packaging
     - cachetools
     - cubinlinker  # [linux64]  # CUDA enhanced compatibility.
-    - cuda-python >=11.5,<11.7.1
+    - cuda-python >=11.7.1,<12.0
 test:                                   # [linux64]
   requires:                             # [linux64]
     - cudatoolkit {{ cuda_version }}.*  # [linux64]
diff --git a/conda/recipes/strings_udf/meta.yaml b/conda/recipes/strings_udf/meta.yaml
index e29fb55ce63..a736edef24d 100644
--- a/conda/recipes/strings_udf/meta.yaml
+++ b/conda/recipes/strings_udf/meta.yaml
@@ -40,7 +40,7 @@ requirements:
     - numba >=0.54
     - libcudf ={{ version }}
     - cudf ={{ version }}
-    - cudatoolkit ={{ cuda_version }}
+    - cudatoolkit {{ cuda_version }}.*
   run:
     - python
     - typing_extensions

From d2367790852905a3daa0a940bb3b3f68c9f0b720 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 1 Nov 2022 11:22:52 -0500
Subject: [PATCH 102/202] Reduce/Remove reliance on `**kwargs` and `*args` in
 `IO` readers & writers (#12025)

Resolves: #11780

This PR:

- [x] Reduces reliance on `args` & `kwargs` for readers and writers when `cudf` engine is selected. However, these will have to stay for the purpose of other engines we support in few readers & writers such as `pandas` & `pyarrow` engines.
- [x] Fixes some bugs where dead parameters were still being used.
- [x] Fixes some bugs where parameters weren't being passed until the cython later in the first place.
- [x] Updates docs related to newly exposed parameters.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12025
---
 python/cudf/cudf/_lib/orc.pyx            |  23 +-
 python/cudf/cudf/_lib/parquet.pyx        |   6 +-
 python/cudf/cudf/core/dataframe.py       |  70 +++++-
 python/cudf/cudf/io/avro.py              |  16 +-
 python/cudf/cudf/io/csv.py               |   7 +-
 python/cudf/cudf/io/json.py              |  74 +++---
 python/cudf/cudf/io/orc.py               |  19 +-
 python/cudf/cudf/io/parquet.py           | 220 ++++++++++++-----
 python/cudf/cudf/io/text.py              |   4 +-
 python/cudf/cudf/tests/test_orc.py       |  21 +-
 python/cudf/cudf/tests/test_s3.py        |   1 -
 python/cudf/cudf/utils/ioutils.py        | 291 ++++++++++++++++-------
 python/dask_cudf/dask_cudf/io/parquet.py |  41 +++-
 13 files changed, 576 insertions(+), 217 deletions(-)

diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index f57e4e8f281..cb364c86dd6 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -120,15 +120,15 @@ cpdef read_orc(object filepaths_or_buffers,
         c_result = move(libcudf_read_orc(c_orc_reader_options))
 
     names = [name.decode() for name in c_result.metadata.column_names]
-    actual_index_names, names, is_range_index, reset_index_name, range_idx = \
-        _get_index_from_metadata(c_result.metadata.user_data,
-                                 names,
-                                 skip_rows,
-                                 num_rows)
+    actual_index_names, col_names, is_range_index, reset_index_name, \
+        range_idx = _get_index_from_metadata(c_result.metadata.user_data,
+                                             names,
+                                             skip_rows,
+                                             num_rows)
 
     data, index = data_from_unique_ptr(
         move(c_result.tbl),
-        names,
+        col_names if columns is None else names,
         actual_index_names
     )
 
@@ -238,9 +238,10 @@ cpdef write_orc(table,
                 object stripe_size_bytes=None,
                 object stripe_size_rows=None,
                 object row_index_stride=None,
-                object cols_as_map_type=None):
+                object cols_as_map_type=None,
+                object index=None):
     """
-    Cython function to call into libcudf API, see `write_orc`.
+    Cython function to call into libcudf API, see `cudf::io::write_orc`.
 
     See Also
     --------
@@ -252,10 +253,12 @@ cpdef write_orc(table,
     cdef unique_ptr[table_input_metadata] tbl_meta
     cdef map[string, string] user_data
     user_data[str.encode("pandas")] = str.encode(generate_pandas_metadata(
-        table, None)
+        table, index)
     )
 
-    if not isinstance(table._index, cudf.RangeIndex):
+    if index is True or (
+        index is None and not isinstance(table._index, cudf.RangeIndex)
+    ):
         tv = table_view_from_table(table)
         tbl_meta = make_unique[table_input_metadata](tv)
         for level, idx_name in enumerate(table._index.names):
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 6de84ce90c3..2667279e205 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -63,6 +63,8 @@ from cudf._lib.utils cimport table_view_from_table
 
 from pyarrow.lib import NativeFile
 
+from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
+
 
 cdef class BufferArrayFromVector:
     cdef Py_ssize_t length
@@ -312,7 +314,7 @@ cpdef write_parquet(
         object statistics="ROWGROUP",
         object metadata_file_path=None,
         object int96_timestamps=False,
-        object row_group_size_bytes=None,
+        object row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT,
         object row_group_size_rows=None,
         object max_page_size_bytes=None,
         object max_page_size_rows=None,
@@ -481,7 +483,7 @@ cdef class ParquetWriter:
 
     def __cinit__(self, object filepath_or_buffer, object index=None,
                   object compression="snappy", str statistics="ROWGROUP",
-                  int row_group_size_bytes=134217728,
+                  int row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT,
                   int row_group_size_rows=1000000,
                   int max_page_size_bytes=524288,
                   int max_page_size_rows=20000):
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 92ca5148c1e..82a4a4a8b65 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6019,11 +6019,51 @@ def select_dtypes(self, include=None, exclude=None):
         return df
 
     @ioutils.doc_to_parquet()
-    def to_parquet(self, path, *args, **kwargs):
+    def to_parquet(
+        self,
+        path,
+        engine="cudf",
+        compression="snappy",
+        index=None,
+        partition_cols=None,
+        partition_file_name=None,
+        partition_offsets=None,
+        statistics="ROWGROUP",
+        metadata_file_path=None,
+        int96_timestamps=False,
+        row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT,
+        row_group_size_rows=None,
+        max_page_size_bytes=None,
+        max_page_size_rows=None,
+        storage_options=None,
+        return_metadata=False,
+        *args,
+        **kwargs,
+    ):
         """{docstring}"""
         from cudf.io import parquet
 
-        return parquet.to_parquet(self, path, *args, **kwargs)
+        return parquet.to_parquet(
+            self,
+            path=path,
+            engine=engine,
+            compression=compression,
+            index=index,
+            partition_cols=partition_cols,
+            partition_file_name=partition_file_name,
+            partition_offsets=partition_offsets,
+            statistics=statistics,
+            metadata_file_path=metadata_file_path,
+            int96_timestamps=int96_timestamps,
+            row_group_size_bytes=row_group_size_bytes,
+            row_group_size_rows=row_group_size_rows,
+            max_page_size_bytes=max_page_size_bytes,
+            max_page_size_rows=max_page_size_rows,
+            storage_options=storage_options,
+            return_metadata=return_metadata,
+            *args,
+            **kwargs,
+        )
 
     @ioutils.doc_to_feather()
     def to_feather(self, path, *args, **kwargs):
@@ -6066,11 +6106,33 @@ def to_csv(
         )
 
     @ioutils.doc_to_orc()
-    def to_orc(self, fname, compression="snappy", *args, **kwargs):
+    def to_orc(
+        self,
+        fname,
+        compression="snappy",
+        statistics="ROWGROUP",
+        stripe_size_bytes=None,
+        stripe_size_rows=None,
+        row_index_stride=None,
+        cols_as_map_type=None,
+        storage_options=None,
+        index=None,
+    ):
         """{docstring}"""
         from cudf.io import orc
 
-        orc.to_orc(self, fname, compression, *args, **kwargs)
+        return orc.to_orc(
+            df=self,
+            fname=fname,
+            compression=compression,
+            statistics=statistics,
+            stripe_size_bytes=stripe_size_bytes,
+            stripe_size_rows=stripe_size_rows,
+            row_index_stride=row_index_stride,
+            cols_as_map_type=cols_as_map_type,
+            storage_options=storage_options,
+            index=index,
+        )
 
     @_cudf_nvtx_annotate
     def stack(self, level=-1, dropna=True):
diff --git a/python/cudf/cudf/io/avro.py b/python/cudf/cudf/io/avro.py
index 66c5c1c5a56..aaafe60d03f 100644
--- a/python/cudf/cudf/io/avro.py
+++ b/python/cudf/cudf/io/avro.py
@@ -1,4 +1,7 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
+import warnings
+
 import cudf
 from cudf import _lib as libcudf
 from cudf.utils import ioutils
@@ -11,13 +14,13 @@ def read_avro(
     columns=None,
     skiprows=None,
     num_rows=None,
-    **kwargs,
+    storage_options=None,
 ):
     """{docstring}"""
 
     is_single_filepath_or_buffer = ioutils.ensure_single_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
-        **kwargs,
+        storage_options=storage_options,
     )
     if not is_single_filepath_or_buffer:
         raise NotImplementedError(
@@ -25,12 +28,19 @@ def read_avro(
         )
 
     filepath_or_buffer, compression = ioutils.get_reader_filepath_or_buffer(
-        path_or_data=filepath_or_buffer, compression=None, **kwargs
+        path_or_data=filepath_or_buffer,
+        compression=None,
+        storage_options=storage_options,
     )
     if compression is not None:
         ValueError("URL content-encoding decompression is not supported")
 
     if engine == "cudf":
+        warnings.warn(
+            "The `engine` parameter is deprecated and will be removed in a "
+            "future release",
+            FutureWarning,
+        )
         return cudf.DataFrame._from_data(
             *libcudf.avro.read_avro(
                 filepath_or_buffer, columns, skiprows, num_rows
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 0adf432c31d..1eacbbb4458 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -61,6 +61,9 @@ def read_csv(
             "`use_python_file_object=False`"
         )
 
+    if bytes_per_thread is None:
+        bytes_per_thread = ioutils._BYTES_PER_THREAD_DEFAULT
+
     is_single_filepath_or_buffer = ioutils.ensure_single_filepath_or_buffer(
         path_or_data=filepath_or_buffer,
         storage_options=storage_options,
@@ -76,9 +79,7 @@ def read_csv(
         iotypes=(BytesIO, StringIO, NativeFile),
         use_python_file_object=use_python_file_object,
         storage_options=storage_options,
-        bytes_per_thread=256_000_000
-        if bytes_per_thread is None
-        else bytes_per_thread,
+        bytes_per_thread=bytes_per_thread,
     )
 
     if na_values is not None and is_scalar(na_values):
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 2a0ae565974..0ae02dcb62b 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
+
 import warnings
 from collections import abc
 from io import BytesIO, StringIO
@@ -17,22 +18,23 @@
 def read_json(
     path_or_buf,
     engine="auto",
-    dtype=True,
+    orient=None,
+    dtype=None,
     lines=False,
     compression="infer",
     byte_range=None,
     keep_quotes=False,
+    storage_options=None,
     *args,
     **kwargs,
 ):
     """{docstring}"""
 
-    if not isinstance(dtype, (abc.Mapping, bool)):
-        warnings.warn(
-            "passing 'dtype' as list is deprecated, instead pass "
-            "a dict of column name and types key-value paris."
-            "in future versions 'dtype' can only be a dict or bool",
-            FutureWarning,
+    if dtype is not None and not isinstance(dtype, (abc.Mapping, bool)):
+        raise TypeError(
+            "'dtype' parameter only supports "
+            "a dict of column names and types as key-value pairs, "
+            f"or a bool, or None. Got {type(dtype)}"
         )
 
     if engine == "cudf" and not lines:
@@ -45,6 +47,20 @@ def read_json(
     if engine == "auto":
         engine = "cudf" if lines else "pandas"
     if engine == "cudf" or engine == "cudf_experimental":
+        if dtype is None:
+            dtype = True
+
+        if kwargs:
+            raise ValueError(
+                "cudf engine doesn't support the "
+                f"following keyword arguments: {list(kwargs.keys())}"
+            )
+        if args:
+            raise ValueError(
+                "cudf engine doesn't support the "
+                f"following positional arguments: {list(args)}"
+            )
+
         # Multiple sources are passed as a list. If a single source is passed,
         # wrap it in a list for unified processing downstream.
         if not is_list_like(path_or_buf):
@@ -52,9 +68,13 @@ def read_json(
 
         filepaths_or_buffers = []
         for source in path_or_buf:
-            if ioutils.is_directory(source, **kwargs):
+            if ioutils.is_directory(
+                path_or_data=source, storage_options=storage_options
+            ):
                 fs = ioutils._ensure_filesystem(
-                    passed_filesystem=None, path=source, **kwargs
+                    passed_filesystem=None,
+                    path=source,
+                    storage_options=storage_options,
                 )
                 source = ioutils.stringify_pathlike(source)
                 source = fs.sep.join([source, "*.json"])
@@ -64,7 +84,7 @@ def read_json(
                 compression=compression,
                 iotypes=(BytesIO, StringIO),
                 allow_raw_text_input=True,
-                **kwargs,
+                storage_options=storage_options,
             )
             if isinstance(tmp_source, list):
                 filepaths_or_buffers.extend(tmp_source)
@@ -88,7 +108,7 @@ def read_json(
 
         if not ioutils.ensure_single_filepath_or_buffer(
             path_or_data=path_or_buf,
-            **kwargs,
+            storage_options=storage_options,
         ):
             raise NotImplementedError(
                 "`read_json` does not yet support reading "
@@ -100,28 +120,24 @@ def read_json(
             compression=compression,
             iotypes=(BytesIO, StringIO),
             allow_raw_text_input=True,
-            **kwargs,
+            storage_options=storage_options,
         )
 
-        if kwargs.get("orient") == "table":
-            pd_value = pd.read_json(
-                path_or_buf,
-                lines=lines,
-                compression=compression,
-                *args,
-                **kwargs,
-            )
-        else:
-            pd_value = pd.read_json(
-                path_or_buf,
-                lines=lines,
-                dtype=dtype,
-                compression=compression,
-                *args,
-                **kwargs,
-            )
+        pd_value = pd.read_json(
+            path_or_buf,
+            lines=lines,
+            dtype=dtype,
+            compression=compression,
+            storage_options=storage_options,
+            orient=orient,
+            *args,
+            **kwargs,
+        )
         df = cudf.from_pandas(pd_value)
 
+    if dtype is None:
+        dtype = True
+
     if dtype is True or isinstance(dtype, abc.Mapping):
         # There exists some dtypes in the result columns that is inferred.
         # Find them and map them to the default dtypes.
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index b9ce07466e5..8865bdd9d33 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -289,7 +289,8 @@ def read_orc(
     use_index=True,
     timestamp_type=None,
     use_python_file_object=True,
-    **kwargs,
+    storage_options=None,
+    bytes_per_thread=None,
 ):
     """{docstring}"""
     from cudf import DataFrame
@@ -326,11 +327,13 @@ def read_orc(
 
     filepaths_or_buffers = []
     for source in filepath_or_buffer:
-        if ioutils.is_directory(source, **kwargs):
+        if ioutils.is_directory(
+            path_or_data=source, storage_options=storage_options
+        ):
             fs = ioutils._ensure_filesystem(
                 passed_filesystem=None,
                 path=source,
-                **kwargs,
+                storage_options=storage_options,
             )
             source = stringify_path(source)
             source = fs.sep.join([source, "*.orc"])
@@ -339,7 +342,8 @@ def read_orc(
             path_or_data=source,
             compression=None,
             use_python_file_object=use_python_file_object,
-            **kwargs,
+            storage_options=storage_options,
+            bytes_per_thread=bytes_per_thread,
         )
         if compression is not None:
             raise ValueError(
@@ -413,7 +417,8 @@ def to_orc(
     stripe_size_rows=None,
     row_index_stride=None,
     cols_as_map_type=None,
-    **kwargs,
+    storage_options=None,
+    index=None,
 ):
     """{docstring}"""
 
@@ -434,7 +439,7 @@ def to_orc(
         raise TypeError("cols_as_map_type must be a list of column names.")
 
     path_or_buf = ioutils.get_writer_filepath_or_buffer(
-        path_or_data=fname, mode="wb", **kwargs
+        path_or_data=fname, mode="wb", storage_options=storage_options
     )
     if ioutils.is_fsspec_open_file(path_or_buf):
         with path_or_buf as file_obj:
@@ -448,6 +453,7 @@ def to_orc(
                 stripe_size_rows,
                 row_index_stride,
                 cols_as_map_type,
+                index,
             )
     else:
         liborc.write_orc(
@@ -459,6 +465,7 @@ def to_orc(
             stripe_size_rows,
             row_index_stride,
             cols_as_map_type,
+            index,
         )
 
 
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 7ac391c5f3d..ceb08cb8058 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -9,7 +9,6 @@
 from typing import Dict, List, Tuple
 from uuid import uuid4
 
-import numpy as np
 from pyarrow import dataset as ds, parquet as pq
 
 import cudf
@@ -54,12 +53,12 @@ def _write_parquet(
     statistics="ROWGROUP",
     metadata_file_path=None,
     int96_timestamps=False,
-    row_group_size_bytes=None,
+    row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT,
     row_group_size_rows=None,
     max_page_size_bytes=None,
     max_page_size_rows=None,
     partitions_info=None,
-    **kwargs,
+    storage_options=None,
 ):
     if is_list_like(paths) and len(paths) > 1:
         if partitions_info is None:
@@ -73,7 +72,9 @@ def _write_parquet(
             ValueError("paths must be list-like when partitions_info provided")
 
     paths_or_bufs = [
-        ioutils.get_writer_filepath_or_buffer(path, mode="wb", **kwargs)
+        ioutils.get_writer_filepath_or_buffer(
+            path_or_data=path, mode="wb", storage_options=storage_options
+        )
         for path in paths
     ]
     common_args = {
@@ -111,12 +112,19 @@ def _write_parquet(
 def write_to_dataset(
     df,
     root_path,
+    compression="snappy",
     filename=None,
     partition_cols=None,
     fs=None,
     preserve_index=False,
     return_metadata=False,
-    **kwargs,
+    statistics="ROWGROUP",
+    int96_timestamps=False,
+    row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT,
+    row_group_size_rows=None,
+    max_page_size_bytes=None,
+    max_page_size_rows=None,
+    storage_options=None,
 ):
     """Wraps `to_parquet` to write partitioned Parquet datasets.
     For each combination of partition group and value,
@@ -136,25 +144,51 @@ def write_to_dataset(
     df : cudf.DataFrame
     root_path : string,
         The root directory of the dataset
+    compression : {'snappy', 'ZSTD', None}, default 'snappy'
+        Name of the compression to use. Use ``None`` for no compression.
     filename : string, default None
         The file name to use (within each partition directory). If None,
         a random uuid4 hex string will be used for each file name.
+    partition_cols : list,
+        Column names by which to partition the dataset.
+        Columns are partitioned in the order they are given.
     fs : FileSystem, default None
         If nothing passed, paths assumed to be found in the local on-disk
         filesystem
     preserve_index : bool, default False
         Preserve index values in each parquet file.
-    partition_cols : list,
-        Column names by which to partition the dataset
-        Columns are partitioned in the order they are given
     return_metadata : bool, default False
         Return parquet metadata for written data. Returned metadata will
         include the file-path metadata (relative to `root_path`).
-    **kwargs : dict,
-        kwargs for to_parquet function.
+    int96_timestamps : bool, default False
+        If ``True``, write timestamps in int96 format. This will convert
+        timestamps from timestamp[ns], timestamp[ms], timestamp[s], and
+        timestamp[us] to the int96 format, which is the number of Julian
+        days and the number of nanoseconds since midnight of 1970-01-01.
+        If ``False``, timestamps will not be altered.
+    row_group_size_bytes: integer or None, default None
+        Maximum size of each stripe of the output.
+        If None, 134217728 (128MB) will be used.
+    row_group_size_rows: integer or None, default None
+        Maximum number of rows of each stripe of the output.
+        If None, 1000000 will be used.
+    max_page_size_bytes: integer or None, default None
+        Maximum uncompressed size of each page of the output.
+        If None, 524288 (512KB) will be used.
+    max_page_size_rows: integer or None, default None
+        Maximum number of rows of each page of the output.
+        If None, 20000 will be used.
+
+    storage_options : dict, optional, default None
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc. For HTTP(S) URLs the
+        key-value pairs are forwarded to ``urllib.request.Request`` as
+        header options. For other URLs (e.g. starting with "s3://", and
+        "gcs://") the key-value pairs are forwarded to ``fsspec.open``.
+        Please see ``fsspec`` and ``urllib`` for more details.
     """
 
-    fs = ioutils._ensure_filesystem(fs, root_path, **kwargs)
+    fs = ioutils._ensure_filesystem(fs, root_path, storage_options)
     fs.mkdirs(root_path, exist_ok=True)
 
     if partition_cols is not None and len(partition_cols) > 0:
@@ -166,31 +200,50 @@ def write_to_dataset(
             part_offsets,
             _,
         ) = _get_partitioned(
-            df,
-            root_path,
-            partition_cols,
-            filename,
-            fs,
-            preserve_index,
-            **kwargs,
+            df=df,
+            root_path=root_path,
+            partition_cols=partition_cols,
+            filename=filename,
+            fs=fs,
+            preserve_index=preserve_index,
+            storage_options=storage_options,
         )
-
-        if return_metadata:
-            kwargs["metadata_file_path"] = metadata_file_paths
+        metadata_file_path = metadata_file_paths if return_metadata else None
         metadata = to_parquet(
-            grouped_df,
-            full_paths,
+            df=grouped_df,
+            path=full_paths,
+            compression=compression,
             index=preserve_index,
             partition_offsets=part_offsets,
-            **kwargs,
+            storage_options=storage_options,
+            metadata_file_path=metadata_file_path,
+            statistics=statistics,
+            int96_timestamps=int96_timestamps,
+            row_group_size_bytes=row_group_size_bytes,
+            row_group_size_rows=row_group_size_rows,
+            max_page_size_bytes=max_page_size_bytes,
+            max_page_size_rows=max_page_size_rows,
         )
 
     else:
         filename = filename or _generate_filename()
         full_path = fs.sep.join([root_path, filename])
-        if return_metadata:
-            kwargs["metadata_file_path"] = filename
-        metadata = df.to_parquet(full_path, index=preserve_index, **kwargs)
+
+        metadata_file_path = filename if return_metadata else None
+
+        metadata = df.to_parquet(
+            path=full_path,
+            compression=compression,
+            index=preserve_index,
+            storage_options=storage_options,
+            metadata_file_path=metadata_file_path,
+            statistics=statistics,
+            int96_timestamps=int96_timestamps,
+            row_group_size_bytes=row_group_size_bytes,
+            row_group_size_rows=row_group_size_rows,
+            max_page_size_bytes=max_page_size_bytes,
+            max_page_size_rows=max_page_size_rows,
+        )
 
     return metadata
 
@@ -361,6 +414,7 @@ def read_parquet(
     filepath_or_buffer,
     engine="cudf",
     columns=None,
+    storage_options=None,
     filters=None,
     row_groups=None,
     strings_to_categorical=False,
@@ -368,6 +422,7 @@ def read_parquet(
     use_python_file_object=True,
     categorical_partitions=True,
     open_file_options=None,
+    bytes_per_thread=None,
     *args,
     **kwargs,
 ):
@@ -383,6 +438,9 @@ def read_parquet(
             )
         open_file_options = {}
 
+    if bytes_per_thread is None:
+        bytes_per_thread = ioutils._BYTES_PER_THREAD_DEFAULT
+
     # Multiple sources are passed as a list. If a single source is passed,
     # wrap it in a list for unified processing downstream.
     if not is_list_like(filepath_or_buffer):
@@ -403,7 +461,9 @@ def read_parquet(
 
     # Start by trying construct a filesystem object, so we
     # can apply filters on remote file-systems
-    fs, paths = ioutils._get_filesystem_and_paths(filepath_or_buffer, **kwargs)
+    fs, paths = ioutils._get_filesystem_and_paths(
+        path_or_data=filepath_or_buffer, storage_options=storage_options
+    )
 
     # Use pyarrow dataset to detect/process directory-partitioned
     # data and apply filters. Note that we can only support partitioned
@@ -418,8 +478,8 @@ def read_parquet(
             partition_keys,
             partition_categories,
         ) = _process_dataset(
-            paths,
-            fs,
+            paths=paths,
+            fs=fs,
             filters=filters,
             row_groups=row_groups,
             categorical_partitions=categorical_partitions,
@@ -431,19 +491,20 @@ def read_parquet(
     filepaths_or_buffers = []
     if use_python_file_object:
         open_file_options = _default_open_file_options(
-            open_file_options,
-            columns,
-            row_groups,
+            open_file_options=open_file_options,
+            columns=columns,
+            row_groups=row_groups,
             fs=fs,
         )
-    for i, source in enumerate(filepath_or_buffer):
+    for source in filepath_or_buffer:
         tmp_source, compression = ioutils.get_reader_filepath_or_buffer(
             path_or_data=source,
             compression=None,
             fs=fs,
             use_python_file_object=use_python_file_object,
             open_file_options=open_file_options,
-            **kwargs,
+            storage_options=storage_options,
+            bytes_per_thread=bytes_per_thread,
         )
 
         if compression is not None:
@@ -571,6 +632,16 @@ def _read_parquet(
     # Simple helper function to dispatch between
     # cudf and pyarrow to read parquet data
     if engine == "cudf":
+        if kwargs:
+            raise ValueError(
+                "cudf engine doesn't support the "
+                f"following keyword arguments: {list(kwargs.keys())}"
+            )
+        if args:
+            raise ValueError(
+                "cudf engine doesn't support the "
+                f"following positional arguments: {list(args)}"
+            )
         return libparquet.read_parquet(
             filepaths_or_buffers,
             columns=columns,
@@ -600,16 +671,28 @@ def to_parquet(
     statistics="ROWGROUP",
     metadata_file_path=None,
     int96_timestamps=False,
-    row_group_size_bytes=None,
+    row_group_size_bytes=ioutils._ROW_GROUP_SIZE_BYTES_DEFAULT,
     row_group_size_rows=None,
     max_page_size_bytes=None,
     max_page_size_rows=None,
+    storage_options=None,
+    return_metadata=False,
     *args,
     **kwargs,
 ):
     """{docstring}"""
 
     if engine == "cudf":
+        if kwargs:
+            raise ValueError(
+                "cudf engine doesn't support the "
+                f"following keyword arguments: {list(kwargs.keys())}"
+            )
+        if args:
+            raise ValueError(
+                "cudf engine doesn't support the "
+                f"following positional arguments: {list(args)}"
+            )
         # Ensure that no columns dtype is 'category'
         for col in df._column_names:
             if partition_cols is None or col not in partition_cols:
@@ -626,34 +709,32 @@ def to_parquet(
                     "partition_cols are provided. To request returning the "
                     "metadata binary blob, pass `return_metadata=True`"
                 )
-            kwargs.update(
-                {
-                    "compression": compression,
-                    "statistics": statistics,
-                    "int96_timestamps": int96_timestamps,
-                    "row_group_size_bytes": row_group_size_bytes,
-                    "row_group_size_rows": row_group_size_rows,
-                    "max_page_size_bytes": max_page_size_bytes,
-                    "max_page_size_rows": max_page_size_rows,
-                }
-            )
+
             return write_to_dataset(
                 df,
                 filename=partition_file_name,
                 partition_cols=partition_cols,
                 root_path=path,
                 preserve_index=index,
-                **kwargs,
+                compression=compression,
+                statistics=statistics,
+                int96_timestamps=int96_timestamps,
+                row_group_size_bytes=row_group_size_bytes,
+                row_group_size_rows=row_group_size_rows,
+                max_page_size_bytes=max_page_size_bytes,
+                max_page_size_rows=max_page_size_rows,
+                return_metadata=return_metadata,
+                storage_options=storage_options,
             )
 
-        if partition_offsets:
-            kwargs["partitions_info"] = list(
-                zip(
-                    partition_offsets,
-                    np.roll(partition_offsets, -1) - partition_offsets,
-                )
-            )[:-1]
-
+        partition_info = (
+            [
+                (i, j - i)
+                for i, j in zip(partition_offsets, partition_offsets[1:])
+            ]
+            if partition_offsets is not None
+            else None
+        )
         return _write_parquet(
             df,
             paths=path if is_list_like(path) else [path],
@@ -666,7 +747,8 @@ def to_parquet(
             row_group_size_rows=row_group_size_rows,
             max_page_size_bytes=max_page_size_bytes,
             max_page_size_rows=max_page_size_rows,
-            **kwargs,
+            partitions_info=partition_info,
+            storage_options=storage_options,
         )
 
     else:
@@ -730,9 +812,11 @@ def _get_partitioned(
     filename=None,
     fs=None,
     preserve_index=False,
-    **kwargs,
+    storage_options=None,
 ):
-    fs = ioutils._ensure_filesystem(fs, root_path, **kwargs)
+    fs = ioutils._ensure_filesystem(
+        fs, root_path, storage_options=storage_options
+    )
     fs.mkdirs(root_path, exist_ok=True)
 
     part_names, grouped_df, part_offsets = _get_groups_and_offsets(
@@ -872,6 +956,13 @@ class ParquetDatasetWriter:
     file_name_prefix : str
         This is a prefix to file names generated only when
         `max_file_size` is specified.
+    storage_options : dict, optional, default None
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc. For HTTP(S) URLs the
+        key-value pairs are forwarded to ``urllib.request.Request`` as
+        header options. For other URLs (e.g. starting with "s3://", and
+        "gcs://") the key-value pairs are forwarded to ``fsspec.open``.
+        Please see ``fsspec`` and ``urllib`` for more details.
 
 
     Examples
@@ -915,7 +1006,7 @@ def __init__(
         statistics="ROWGROUP",
         max_file_size=None,
         file_name_prefix=None,
-        **kwargs,
+        storage_options=None,
     ) -> None:
         if isinstance(path, str) and path.startswith("s3://"):
             self.fs_meta = {"is_s3": True, "actual_path": path}
@@ -938,7 +1029,7 @@ def __init__(
         # Map of partition_col values to their ParquetWriter's index
         # in self._chunked_writers for reverse lookup
         self.path_cw_map: Dict[str, int] = {}
-        self.kwargs = kwargs
+        self.storage_options = storage_options
         self.filename = file_name_prefix
         self.max_file_size = max_file_size
         if max_file_size is not None:
@@ -961,7 +1052,7 @@ def write_table(self, df):
             partition_cols=self.partition_cols,
             preserve_index=self.common_args["index"],
         )
-        fs = ioutils._ensure_filesystem(None, self.path)
+        fs = ioutils._ensure_filesystem(None, self.path, None)
         fs.mkdirs(self.path, exist_ok=True)
 
         full_paths = []
@@ -1044,10 +1135,11 @@ def write_table(self, df):
         )
         existing_cw_batch = defaultdict(dict)
         new_cw_paths = []
+        partition_info = [(i, j - i) for i, j in zip(offsets, offsets[1:])]
 
         for path, part_info, meta_path in zip(
             paths,
-            zip(offsets, np.roll(offsets, -1) - offsets),
+            partition_info,
             metadata_file_paths,
         ):
             if path in self.path_cw_map:  # path is a currently open file
@@ -1097,7 +1189,7 @@ def close(self, return_metadata=False):
             local_path = self.path
             s3_path = self.fs_meta["actual_path"]
             s3_file, _ = ioutils._get_filesystem_and_paths(
-                s3_path, **self.kwargs
+                s3_path, storage_options=self.storage_options
             )
             s3_file.put(local_path, s3_path, recursive=True)
             shutil.rmtree(self.path)
diff --git a/python/cudf/cudf/io/text.py b/python/cudf/cudf/io/text.py
index f341edbf6c1..eb2c7fa7ef6 100644
--- a/python/cudf/cudf/io/text.py
+++ b/python/cudf/cudf/io/text.py
@@ -17,7 +17,7 @@ def read_text(
     strip_delimiters=False,
     compression=None,
     compression_offsets=None,
-    **kwargs,
+    storage_options=None,
 ):
     """{docstring}"""
 
@@ -28,7 +28,7 @@ def read_text(
         path_or_data=filepath_or_buffer,
         compression=None,
         iotypes=(BytesIO, StringIO),
-        **kwargs,
+        storage_options=storage_options,
     )
 
     return cudf.Series._from_data(
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 5aa049db31a..fbd9b83330e 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1759,11 +1759,26 @@ def test_orc_writer_nvcomp(compression):
         assert_eq(expected, got)
 
 
+@pytest.mark.parametrize("index_obj", [None, [10, 11, 12], ["x", "y", "z"]])
 @pytest.mark.parametrize("index", [True, False, None])
-@pytest.mark.parametrize("columns", [None, [], ["b", "a"]])
-def test_orc_columns_and_index_param(index, columns):
+@pytest.mark.parametrize(
+    "columns",
+    [
+        None,
+        [],
+        pytest.param(
+            ["b", "a"],
+            marks=pytest.mark.xfail(
+                reason="https://github.com/rapidsai/cudf/issues/12026"
+            ),
+        ),
+    ],
+)
+def test_orc_columns_and_index_param(index_obj, index, columns):
     buffer = BytesIO()
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    df = cudf.DataFrame(
+        {"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=index_obj
+    )
     df.to_orc(buffer, index=index)
 
     expected = pd.read_orc(buffer, columns=columns)
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index 5c06dea4ca6..d2339930b91 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -298,7 +298,6 @@ def test_read_parquet_ext(
             f"s3://{bucket}/{fname}",
             storage_options=s3so,
             bytes_per_thread=bytes_per_thread,
-            footer_sample_size=3200,
             columns=columns,
         )
     if index:
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 5298e470a91..ebb73ba0ca6 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -25,6 +25,9 @@
     fsspec_parquet = None
 
 
+_BYTES_PER_THREAD_DEFAULT = 256 * 1024 * 1024
+_ROW_GROUP_SIZE_BYTES_DEFAULT = 128 * 1024 * 1024
+
 _docstring_remote_sources = """
 - cuDF supports local and remote data stores. See configuration details for
   available sources
@@ -43,12 +46,20 @@
     (such as builtin `open()` file handler function or `BytesIO`).
 engine : ['cudf'], default 'cudf'
     Parser engine to use.
+    This parameter is deprecated.
 columns : list, default None
     If not None, only these columns will be read.
 skiprows : int, default None
     If not None, the number of rows to skip from the start of the file.
 num_rows : int, default None
     If not None, the total number of rows to read.
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
 
 Returns
 -------
@@ -132,6 +143,13 @@
     Parser engine to use.
 columns : list, default None
     If not None, only these columns will be read.
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
 filters : list of tuple, list of lists of tuples default None
     If not None, specifies a filter predicate used to filter out row groups
     using statistics stored for each row group as Parquet metadata. Row groups
@@ -170,6 +188,13 @@
     deactivate optimized precaching, set the "method" to `None` under the
     "precache_options" key. Note that the `open_file_func` key can also be
     used to specify a custom file-open function.
+bytes_per_thread : int, default None
+    Determines the number of bytes to be allocated per thread to read the
+    files in parallel. When there is a file of large size, we get slightly
+    better throughput by decomposing it and transferring multiple "blocks"
+    in parallel (using a python thread pool). Default allocation is
+    {bytes_per_thread} bytes.
+    This parameter is functional only when `use_python_file_object=False`.
 
 Returns
 -------
@@ -195,7 +220,8 @@
 cudf.DataFrame.to_parquet
 cudf.read_orc
 """.format(
-    remote_data_sources=_docstring_remote_sources
+    remote_data_sources=_docstring_remote_sources,
+    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT,
 )
 doc_read_parquet = docfmt_partial(docstring=_docstring_read_parquet)
 
@@ -208,14 +234,15 @@
     File path or Root Directory path. Will be used as Root Directory path
     while writing a partitioned dataset. Use list of str with partition_offsets
     to write parts of the dataframe to different files.
-compression : {'snappy', 'ZSTD', None}, default 'snappy'
+compression : {{'snappy', 'ZSTD', None}}, default 'snappy'
     Name of the compression to use. Use ``None`` for no compression.
 index : bool, default None
-    If ``True``, include the dataframe's index(es) in the file output. If
-    ``False``, they will not be written to the file. If ``None``, the
-    engine's default behavior will be used. However, instead of being saved
-    as values, the ``RangeIndex`` will be stored as a range in the metadata
-    so it doesn’t require much space and is faster. Other indexes will
+    If ``True``, include the dataframe's index(es) in the file output.
+    If ``False``, they will not be written to the file.
+    If ``None``, similar to ``True`` the dataframe's index(es) will
+    be saved, however, instead of being saved as values any
+    ``RangeIndex`` will be stored as a range in the metadata so it
+    doesn't require much space and is faster. Other indexes will
     be included as columns in the file output.
 partition_cols : list, optional, default None
     Column names by which to partition the dataset
@@ -228,7 +255,7 @@
 partition_offsets : list, optional, default None
     Offsets to partition the dataframe by. Should be used when path is list
     of str. Should be a list of integers of size ``len(path) + 1``
-statistics : {'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}, default 'ROWGROUP'
+statistics : {{'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}}, default 'ROWGROUP'
     Level at which column statistics should be included in file.
 metadata_file_path : str, optional, default None
     If specified, this function will return a binary blob containing the footer
@@ -239,11 +266,12 @@
     If ``True``, write timestamps in int96 format. This will convert
     timestamps from timestamp[ns], timestamp[ms], timestamp[s], and
     timestamp[us] to the int96 format, which is the number of Julian
-    days and the number of nanoseconds since midnight. If ``False``,
-    timestamps will not be altered.
-row_group_size_bytes: integer or None, default None
+    days and the number of nanoseconds since midnight of 1970-01-01.
+    If ``False``, timestamps will not be altered.
+row_group_size_bytes: integer, default {row_group_size_bytes_val}
     Maximum size of each stripe of the output.
-    If None, 134217728 (128MB) will be used.
+    If None, {row_group_size_bytes_val}
+    ({row_group_size_bytes_val_in_mb} MB) will be used.
 row_group_size_rows: integer or None, default None
     Maximum number of rows of each stripe of the output.
     If None, 1000000 will be used.
@@ -253,15 +281,30 @@
 max_page_size_rows: integer or None, default None
     Maximum number of rows of each page of the output.
     If None, 20000 will be used.
-**kwargs
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
+return_metadata : bool, default False
+    Return parquet metadata for written data. Returned metadata will
+    include the file path metadata (relative to `root_path`).
     To request metadata binary blob when using with ``partition_cols``, Pass
     ``return_metadata=True`` instead of specifying ``metadata_file_path``
+**kwargs
+    Additional parameters will be passed to execution engines other
+    than ``cudf``.
 
 
 See Also
 --------
 cudf.read_parquet
-"""
+""".format(
+    row_group_size_bytes_val=_ROW_GROUP_SIZE_BYTES_DEFAULT,
+    row_group_size_bytes_val_in_mb=_ROW_GROUP_SIZE_BYTES_DEFAULT / 1024 / 1024,
+)
 doc_to_parquet = docfmt_partial(docstring=_docstring_to_parquet)
 
 _docstring_merge_parquet_filemetadata = """
@@ -392,7 +435,20 @@
     If True, Arrow-backed PythonFile objects will be used in place of fsspec
     AbstractBufferedFile objects at IO time. This option is likely to improve
     performance when making small reads from larger ORC files.
-kwargs are passed to the engine
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
+bytes_per_thread : int, default None
+    Determines the number of bytes to be allocated per thread to read the
+    files in parallel. When there is a file of large size, we get slightly
+    better throughput by decomposing it and transferring multiple "blocks"
+    in parallel (using a python thread pool). Default allocation is
+    {bytes_per_thread} bytes.
+    This parameter is functional only when `use_python_file_object=False`.
 
 Returns
 -------
@@ -416,7 +472,8 @@
 --------
 cudf.DataFrame.to_orc
 """.format(
-    remote_data_sources=_docstring_remote_sources
+    remote_data_sources=_docstring_remote_sources,
+    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT,
 )
 doc_read_orc = docfmt_partial(docstring=_docstring_read_orc)
 
@@ -429,8 +486,9 @@
     File path or object where the ORC dataset will be stored.
 compression : {{ 'snappy', 'ZSTD', None }}, default 'snappy'
     Name of the compression to use. Use None for no compression.
-enable_statistics: boolean, default True
-    Enable writing column statistics.
+statistics: str {{ "ROWGROUP", "STRIPE", None }}, default "ROWGROUP"
+    The granularity with which column statistics must
+    be written to the file.
 stripe_size_bytes: integer or None, default None
     Maximum size of each stripe of the output.
     If None, 67108864 (64MB) will be used.
@@ -444,6 +502,21 @@
     A list of column names which should be written as map type in the ORC file.
     Note that this option only affects columns of ListDtype. Names of other
     column types will be ignored.
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
+index : bool, default None
+    If ``True``, include the dataframe's index(es) in the file output.
+    If ``False``, they will not be written to the file.
+    If ``None``, similar to ``True`` the dataframe's index(es) will
+    be saved, however, instead of being saved as values any
+    ``RangeIndex`` will be stored as a range in the metadata so it
+    doesn’t require much space and is faster. Other indexes will
+    be included as columns in the file output.
 
 See Also
 --------
@@ -504,10 +577,11 @@
         ``'columns'``, and ``'records'``.
 typ : type of object to recover (series or frame), default 'frame'
     With cudf engine, only frame output is supported.
-dtype : boolean or dict, default True
+dtype : boolean or dict, default None
     If True, infer dtypes for all columns; if False, then don't infer dtypes at all,
     if a dict, provide a mapping from column names to their respective dtype (any missing
     columns will have their dtype inferred). Applies only to the data.
+    For all ``orient`` values except ``'table'``, default is ``True``.
 convert_axes : boolean, default True
 
     .. admonition:: Not GPU-accelerated
@@ -613,6 +687,13 @@
     If `True`, any string values are read literally (and wrapped in an
     additional set of quotes).
     If `False` string values are parsed into Python strings.
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
 
 Returns
 -------
@@ -1043,7 +1124,7 @@
     Extra options that make sense for a particular storage connection,
     e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
     pairs are forwarded to ``urllib.request.Request`` as header options.
-    For other URLs (e.g. starting with “s3://”, and “gcs://”) the key-value
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
     pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
     ``urllib`` for more details.
 bytes_per_thread : int, default None
@@ -1051,7 +1132,7 @@
     files in parallel. When there is a file of large size, we get slightly
     better throughput by decomposing it and transferring multiple "blocks"
     in parallel (using a python thread pool). Default allocation is
-    256_000_000 bytes.
+    {bytes_per_thread} bytes.
     This parameter is functional only when `use_python_file_object=False`.
 Returns
 -------
@@ -1089,7 +1170,8 @@
 --------
 cudf.DataFrame.to_csv
 """.format(
-    remote_data_sources=_docstring_remote_sources
+    remote_data_sources=_docstring_remote_sources,
+    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT,
 )
 doc_read_csv = docfmt_partial(docstring=_docstring_read_csv)
 
@@ -1139,7 +1221,7 @@
     Extra options that make sense for a particular storage connection,
     e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
     pairs are forwarded to ``urllib.request.Request`` as header options.
-    For other URLs (e.g. starting with “s3://”, and “gcs://”) the key-value
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
     pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
     ``urllib`` for more details.
 Returns
@@ -1215,6 +1297,12 @@
 delimiter : string, default None
     The delimiter that should be used for splitting text chunks into
     separate cudf column rows. The delimiter may be one or more characters.
+byte_range : list or tuple, default None
+    Byte range within the input file to be read. The first number is the
+    offset in bytes, the second number is the range size in bytes.
+    The output contains all rows that start inside the byte range
+    (i.e. at or after the offset, and before the end at `offset + size`),
+    which may include rows that continue past the end.
 strip_delimiters : boolean, default False
     Unlike the `str.split()` function, `read_text` preserves the delimiter
     at the end of a field in output by default, meaning `a;b;c` will turn into
@@ -1222,12 +1310,6 @@
     Setting this option to `True` will strip these trailing delimiters,
     leaving only the contents between delimiters in the resulting column:
     `['a','b','c']`
-byte_range : list or tuple, default None
-    Byte range within the input file to be read. The first number is the
-    offset in bytes, the second number is the range size in bytes.
-    The output contains all rows that start inside the byte range
-    (i.e. at or after the offset, and before the end at `offset + size`),
-    which may include rows that continue past the end.
 compression : string, default None
     Which compression type is the input compressed with.
     Currently supports only `bgzip`, and requires the path to a file as input.
@@ -1238,6 +1320,13 @@
     compressed file (upper 48 bits).
     The start offset points to the first byte to be read, the end offset points
     one past the last byte to be read.
+storage_options : dict, optional, default None
+    Extra options that make sense for a particular storage connection,
+    e.g. host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details.
 
 Returns
 -------
@@ -1247,6 +1336,66 @@
 doc_read_text = docfmt_partial(docstring=_docstring_text_datasource)
 
 
+_docstring_get_reader_filepath_or_buffer = """
+Return either a filepath string to data, or a memory buffer of data.
+If filepath, then the source filepath is expanded to user's environment.
+If buffer, then data is returned in-memory as bytes or a ByteIO object.
+
+Parameters
+----------
+path_or_data : str, file-like object, bytes, ByteIO
+    Path to data or the data itself.
+compression : str
+    Type of compression algorithm for the content
+mode : str
+    Mode in which file is opened
+iotypes : (), default (BytesIO)
+    Object type to exclude from file-like check
+use_python_file_object : boolean, default False
+    If True, Arrow-backed PythonFile objects will be used in place
+    of fsspec AbstractBufferedFile objects.
+open_file_options : dict, optional
+    Optional dictionary of keyword arguments to pass to
+    `_open_remote_files` (used for remote storage only).
+allow_raw_text_input : boolean, default False
+    If True, this indicates the input `path_or_data` could be a raw text
+    input and will not check for its existence in the filesystem. If False,
+    the input must be a path and an error will be raised if it does not
+    exist.
+storage_options : dict, optional
+    Extra options that make sense for a particular storage connection, e.g.
+    host, port, username, password, etc. For HTTP(S) URLs the key-value
+    pairs are forwarded to ``urllib.request.Request`` as header options.
+    For other URLs (e.g. starting with "s3://", and "gcs://") the key-value
+    pairs are forwarded to ``fsspec.open``. Please see ``fsspec`` and
+    ``urllib`` for more details, and for more examples on storage options
+    refer `here <https://pandas.pydata.org/docs/user_guide/io.html?
+    highlight=storage_options#reading-writing-remote-files>`__.
+bytes_per_thread : int, default None
+    Determines the number of bytes to be allocated per thread to read the
+    files in parallel. When there is a file of large size, we get slightly
+    better throughput by decomposing it and transferring multiple "blocks"
+    in parallel (using a Python thread pool). Default allocation is
+    {bytes_per_thread} bytes.
+    This parameter is functional only when `use_python_file_object=False`.
+
+Returns
+-------
+filepath_or_buffer : str, bytes, BytesIO, list
+    Filepath string or in-memory buffer of data or a
+    list of Filepath strings or in-memory buffers of data.
+compression : str
+    Type of compression algorithm for the content
+    """.format(
+    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT
+)
+
+
+doc_get_reader_filepath_or_buffer = docfmt_partial(
+    docstring=_docstring_get_reader_filepath_or_buffer
+)
+
+
 def is_url(url):
     """Check if a string is a valid URL to a network location.
 
@@ -1295,13 +1444,12 @@ def _is_local_filesystem(fs):
     return isinstance(fs, fsspec.implementations.local.LocalFileSystem)
 
 
-def ensure_single_filepath_or_buffer(path_or_data, **kwargs):
+def ensure_single_filepath_or_buffer(path_or_data, storage_options=None):
     """Return False if `path_or_data` resolves to multiple filepaths or
     buffers.
     """
     path_or_data = stringify_pathlike(path_or_data)
     if isinstance(path_or_data, str):
-        storage_options = kwargs.get("storage_options")
         path_or_data = os.path.expanduser(path_or_data)
         try:
             fs, _, paths = get_fs_token_paths(
@@ -1321,11 +1469,10 @@ def ensure_single_filepath_or_buffer(path_or_data, **kwargs):
     return True
 
 
-def is_directory(path_or_data, **kwargs):
+def is_directory(path_or_data, storage_options=None):
     """Returns True if the provided filepath is a directory"""
     path_or_data = stringify_pathlike(path_or_data)
     if isinstance(path_or_data, str):
-        storage_options = kwargs.get("storage_options")
         path_or_data = os.path.expanduser(path_or_data)
         try:
             fs = get_fs_token_paths(
@@ -1342,7 +1489,7 @@ def is_directory(path_or_data, **kwargs):
     return False
 
 
-def _get_filesystem_and_paths(path_or_data, **kwargs):
+def _get_filesystem_and_paths(path_or_data, storage_options):
     # Returns a filesystem object and the filesystem-normalized
     # paths. If `path_or_data` does not correspond to a path or
     # list of paths (or if the protocol is not supported), the
@@ -1355,7 +1502,6 @@ def _get_filesystem_and_paths(path_or_data, **kwargs):
         and isinstance(stringify_pathlike(path_or_data[0]), str)
     ):
         # Ensure we are always working with a list
-        storage_options = kwargs.get("storage_options")
         if isinstance(path_or_data, list):
             path_or_data = [
                 os.path.expanduser(stringify_pathlike(source))
@@ -1472,54 +1618,21 @@ def _open_remote_files(
     ]
 
 
+@doc_get_reader_filepath_or_buffer()
 def get_reader_filepath_or_buffer(
     path_or_data,
     compression,
     mode="rb",
     fs=None,
     iotypes=(BytesIO, NativeFile),
-    byte_ranges=None,
     use_python_file_object=False,
     open_file_options=None,
     allow_raw_text_input=False,
-    **kwargs,
+    storage_options=None,
+    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT,
 ):
-    """Return either a filepath string to data, or a memory buffer of data.
-    If filepath, then the source filepath is expanded to user's environment.
-    If buffer, then data is returned in-memory as bytes or a ByteIO object.
-
-    Parameters
-    ----------
-    path_or_data : str, file-like object, bytes, ByteIO
-        Path to data or the data itself.
-    compression : str
-        Type of compression algorithm for the content
-    mode : str
-        Mode in which file is opened
-    iotypes : (), default (BytesIO)
-        Object type to exclude from file-like check
-    byte_ranges : list, optional
-        List of known byte ranges that will be read from path_or_data
-    use_python_file_object : boolean, default False
-        If True, Arrow-backed PythonFile objects will be used in place
-        of fsspec AbstractBufferedFile objects.
-    open_file_options : dict, optional
-        Optional dictionary of key-word arguments to pass to
-        `_open_remote_files` (used for remote storage only).
-    allow_raw_text_input : boolean, default False
-        If True, this indicates the input `path_or_data` could be a raw text
-        input and will not check for its existence in the filesystem. If False,
-        the input must be a path and an error will be raised if it does not
-        exist.
+    """{docstring}"""
 
-    Returns
-    -------
-    filepath_or_buffer : str, bytes, BytesIO, list
-        Filepath string or in-memory buffer of data or a
-        list of Filepath strings or in-memory buffers of data.
-    compression : str
-        Type of compression algorithm for the content
-    """
     path_or_data = stringify_pathlike(path_or_data)
 
     if isinstance(path_or_data, str):
@@ -1527,7 +1640,9 @@ def get_reader_filepath_or_buffer(
         # Get a filesystem object if one isn't already available
         paths = [path_or_data]
         if fs is None:
-            fs, paths = _get_filesystem_and_paths(path_or_data, **kwargs)
+            fs, paths = _get_filesystem_and_paths(
+                path_or_data, storage_options
+            )
             if fs is None:
                 return path_or_data, compression
 
@@ -1560,7 +1675,7 @@ def get_reader_filepath_or_buffer(
                             fpath,
                             fs=fs,
                             mode=mode,
-                            **kwargs,
+                            bytes_per_thread=bytes_per_thread,
                         )
                     )
                     for fpath in paths
@@ -1575,13 +1690,15 @@ def get_reader_filepath_or_buffer(
             path_or_data = ArrowPythonFile(path_or_data)
         else:
             path_or_data = BytesIO(
-                _fsspec_data_transfer(path_or_data, mode=mode, **kwargs)
+                _fsspec_data_transfer(
+                    path_or_data, mode=mode, bytes_per_thread=bytes_per_thread
+                )
             )
 
     return path_or_data, compression
 
 
-def get_writer_filepath_or_buffer(path_or_data, mode, **kwargs):
+def get_writer_filepath_or_buffer(path_or_data, mode, storage_options=None):
     """
     Return either a filepath string to data,
     or a open file object to the output filesystem
@@ -1592,14 +1709,23 @@ def get_writer_filepath_or_buffer(path_or_data, mode, **kwargs):
         Path to data or the data itself.
     mode : str
         Mode in which file is opened
+    storage_options : dict, optional, default None
+        Extra options that make sense for a particular storage connection,
+        e.g. host, port, username, password, etc. For HTTP(S) URLs the
+        key-value pairs are forwarded to ``urllib.request.Request`` as
+        header options. For other URLs (e.g. starting with "s3://", and
+        "gcs://") the key-value pairs are forwarded to ``fsspec.open``.
+        Please see ``fsspec`` and ``urllib`` for more details.
 
     Returns
     -------
     filepath_or_buffer : str,
         Filepath string or buffer of data
     """
+    if storage_options is None:
+        storage_options = {}
+
     if isinstance(path_or_data, str):
-        storage_options = kwargs.get("storage_options", {})
         path_or_data = os.path.expanduser(path_or_data)
         fs = get_fs_token_paths(
             path_or_data, mode=mode or "w", storage_options=storage_options
@@ -1793,11 +1919,11 @@ def _prepare_filters(filters):
     return filters
 
 
-def _ensure_filesystem(passed_filesystem, path, **kwargs):
+def _ensure_filesystem(passed_filesystem, path, storage_options):
     if passed_filesystem is None:
         return get_fs_token_paths(
             path[0] if isinstance(path, list) else path,
-            storage_options=kwargs.get("storage_options", {}),
+            storage_options={} if storage_options is None else storage_options,
         )[0]
     return passed_filesystem
 
@@ -1811,11 +1937,12 @@ def _fsspec_data_transfer(
     path_or_fob,
     fs=None,
     file_size=None,
-    bytes_per_thread=256_000_000,
+    bytes_per_thread=_BYTES_PER_THREAD_DEFAULT,
     max_gap=64_000,
     mode="rb",
-    **kwargs,
 ):
+    if bytes_per_thread is None:
+        bytes_per_thread = _BYTES_PER_THREAD_DEFAULT
 
     # Require `fs` if `path_or_fob` is not file-like
     file_like = is_file_like(path_or_fob)
@@ -1848,7 +1975,6 @@ def _fsspec_data_transfer(
         byte_ranges,
         buf,
         fs=fs,
-        **kwargs,
     )
 
     return buf.tobytes()
@@ -1898,7 +2024,6 @@ def _read_byte_ranges(
     ranges,
     local_buffer,
     fs=None,
-    **kwargs,
 ):
     # Simple utility to copy remote byte ranges
     # into a local buffer for IO in libcudf
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index e64847948cf..bd398cb9607 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -22,7 +22,11 @@
 from cudf.io import write_to_dataset
 from cudf.io.parquet import _default_open_file_options
 from cudf.utils.dtypes import cudf_dtype_from_pa_type
-from cudf.utils.ioutils import _is_local_filesystem, _open_remote_files
+from cudf.utils.ioutils import (
+    _ROW_GROUP_SIZE_BYTES_DEFAULT,
+    _is_local_filesystem,
+    _open_remote_files,
+)
 
 
 class CudfEngine(ArrowDatasetEngine):
@@ -292,24 +296,47 @@ def write_partition(
             preserve_index = True
         if partition_on:
             md = write_to_dataset(
-                df,
-                path,
+                df=df,
+                root_path=path,
+                compression=compression,
                 filename=filename,
                 partition_cols=partition_on,
                 fs=fs,
                 preserve_index=preserve_index,
                 return_metadata=return_metadata,
-                **kwargs,
+                statistics=kwargs.get("statistics", "ROWGROUP"),
+                int96_timestamps=kwargs.get("int96_timestamps", False),
+                row_group_size_bytes=kwargs.get(
+                    "row_group_size_bytes", _ROW_GROUP_SIZE_BYTES_DEFAULT
+                ),
+                row_group_size_rows=kwargs.get("row_group_size_rows", None),
+                max_page_size_bytes=kwargs.get("max_page_size_bytes", None),
+                max_page_size_rows=kwargs.get("max_page_size_rows", None),
+                storage_options=kwargs.get("storage_options", None),
             )
         else:
             with fs.open(fs.sep.join([path, filename]), mode="wb") as out_file:
                 if not isinstance(out_file, IOBase):
                     out_file = BufferedWriter(out_file)
                 md = df.to_parquet(
-                    out_file,
-                    compression=compression,
+                    path=out_file,
+                    engine=kwargs.get("engine", "cudf"),
+                    index=kwargs.get("index", None),
+                    partition_cols=kwargs.get("partition_cols", None),
+                    partition_file_name=kwargs.get(
+                        "partition_file_name", None
+                    ),
+                    partition_offsets=kwargs.get("partition_offsets", None),
+                    statistics=kwargs.get("statistics", "ROWGROUP"),
+                    int96_timestamps=kwargs.get("int96_timestamps", False),
+                    row_group_size_bytes=kwargs.get(
+                        "row_group_size_bytes", _ROW_GROUP_SIZE_BYTES_DEFAULT
+                    ),
+                    row_group_size_rows=kwargs.get(
+                        "row_group_size_rows", None
+                    ),
+                    storage_options=kwargs.get("storage_options", None),
                     metadata_file_path=filename if return_metadata else None,
-                    **kwargs,
                 )
         # Return the schema needed to write the metadata
         if return_metadata:

From 41fca6ec0c36f7d1dbc0b0f36ffde271e9118961 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 1 Nov 2022 10:34:52 -0700
Subject: [PATCH 103/202] Add `read_orc_metadata` to libcudf (#11815)

Issue https://github.com/rapidsai/cudf/issues/11675

Adds a C++ interface to get information about an ORC file. It is meant to be an efficient way to get information like column names and types, as well as file structure (e.g. number of stripes). The returned structure can be expanded to include more types of metadata, for now it  only returns info that we found relevant internally.

The returned column hierarchy matches the one used in ORC (i.e. root struct column included), not the hierarchy of a cuDF dataframe that the file would be read as (root column children become top level cuDF columns).

This PR also includes improvements to ORC reader benchmarks, enabled by the new metadata API.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/11815
---
 conda/recipes/libcudf/meta.yaml               |   1 +
 cpp/benchmarks/io/csv/csv_writer.cpp          |   4 +-
 cpp/benchmarks/io/orc/orc_reader_input.cpp    |   2 +
 cpp/benchmarks/io/orc/orc_reader_options.cpp  |  47 +++--
 cpp/benchmarks/io/orc/orc_writer.cpp          |   2 +
 cpp/benchmarks/io/orc/orc_writer_chunks.cpp   |   4 +-
 .../io/parquet/parquet_reader_input.cpp       |   2 +
 .../io/parquet/parquet_reader_options.cpp     |   2 +
 cpp/benchmarks/io/parquet/parquet_writer.cpp  |   4 +-
 .../io/parquet/parquet_writer_chunks.cpp      |   4 +-
 cpp/include/cudf/io/orc_metadata.hpp          | 164 +++++++++++++++++-
 .../cudf/io/orc_types.hpp}                    |  10 +-
 cpp/src/io/functions.cpp                      |  36 ++++
 cpp/src/io/orc/dict_enc.cu                    |   2 +-
 cpp/src/io/orc/orc.hpp                        |   6 +-
 cpp/src/io/orc/orc_gpu.hpp                    |   2 +-
 cpp/src/io/orc/stats_enc.cu                   |   2 +-
 cpp/src/io/orc/stripe_data.cu                 |   9 +-
 cpp/src/io/orc/stripe_enc.cu                  |   2 +-
 cpp/src/io/orc/stripe_init.cu                 |   2 +-
 cpp/tests/io/orc_test.cpp                     | 105 +++++++++++
 21 files changed, 370 insertions(+), 42 deletions(-)
 rename cpp/{src/io/orc/orc_common.hpp => include/cudf/io/orc_types.hpp} (94%)

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 739c5409ca4..0687e76a356 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -149,6 +149,7 @@ outputs:
         - test -f $PREFIX/include/cudf/io/json.hpp
         - test -f $PREFIX/include/cudf/io/orc.hpp
         - test -f $PREFIX/include/cudf/io/orc_metadata.hpp
+        - test -f $PREFIX/include/cudf/io/orc_types.hpp
         - test -f $PREFIX/include/cudf/io/parquet.hpp
         - test -f $PREFIX/include/cudf/io/text/byte_range_info.hpp
         - test -f $PREFIX/include/cudf/io/text/data_chunk_source.hpp
diff --git a/cpp/benchmarks/io/csv/csv_writer.cpp b/cpp/benchmarks/io/csv/csv_writer.cpp
index 5d61d81bb71..54a86094eb7 100644
--- a/cpp/benchmarks/io/csv/csv_writer.cpp
+++ b/cpp/benchmarks/io/csv/csv_writer.cpp
@@ -21,8 +21,8 @@
 
 #include <cudf/io/csv.hpp>
 
-// to enable, run cmake with -DBUILD_BENCHMARKS=ON
-
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr size_t data_size         = 256 << 20;
 constexpr cudf::size_type num_cols = 64;
 
diff --git a/cpp/benchmarks/io/orc/orc_reader_input.cpp b/cpp/benchmarks/io/orc/orc_reader_input.cpp
index 8c6f9f32f61..f1aaf506a60 100644
--- a/cpp/benchmarks/io/orc/orc_reader_input.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_input.cpp
@@ -25,6 +25,8 @@
 
 #include <nvbench/nvbench.cuh>
 
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr int64_t data_size        = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
diff --git a/cpp/benchmarks/io/orc/orc_reader_options.cpp b/cpp/benchmarks/io/orc/orc_reader_options.cpp
index 6ca7a494642..1b7d33ccd19 100644
--- a/cpp/benchmarks/io/orc/orc_reader_options.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_options.cpp
@@ -21,17 +21,27 @@
 #include <benchmarks/io/nvbench_helpers.hpp>
 
 #include <cudf/io/orc.hpp>
+#include <cudf/io/orc_metadata.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <nvbench/nvbench.cuh>
 
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr int64_t data_size = 512 << 20;
+// The number of separate read calls to use when reading files in multiple chunks
+// Each call reads roughly equal amounts of data
+constexpr int32_t chunked_read_num_chunks = 8;
 
 std::vector<std::string> get_col_names(cudf::io::source_info const& source)
 {
-  cudf::io::orc_reader_options const read_options =
-    cudf::io::orc_reader_options::builder(source).num_rows(1);
-  return cudf::io::read_orc(read_options).metadata.column_names;
+  auto const top_lvl_cols = cudf::io::read_orc_metadata(source).schema().root().children();
+  std::vector<std::string> col_names;
+  std::transform(top_lvl_cols.cbegin(),
+                 top_lvl_cols.cend(),
+                 std::back_inserter(col_names),
+                 [](auto const& col_meta) { return col_meta.name(); });
+  return col_names;
 }
 
 template <column_selection ColSelection,
@@ -48,7 +58,7 @@ void BM_orc_read_varying_options(nvbench::state& state,
 {
   cudf::rmm_pool_raii rmm_pool;
 
-  auto constexpr num_chunks = 1;
+  auto const num_chunks = RowSelection == row_selection::ALL ? 1 : chunked_read_num_chunks;
 
   auto const use_index     = UsesIndex == uses_index::YES;
   auto const use_np_dtypes = UsesNumpyDType == uses_numpy_dtype::YES;
@@ -79,7 +89,8 @@ void BM_orc_read_varying_options(nvbench::state& state,
       .use_np_dtypes(use_np_dtypes)
       .timestamp_type(ts_type);
 
-  auto const num_stripes              = data_size / (64 << 20);
+  auto const num_stripes =
+    cudf::io::read_orc_metadata(source_sink.make_source_info()).num_stripes();
   cudf::size_type const chunk_row_cnt = view.num_rows() / num_chunks;
 
   auto mem_stats_logger = cudf::memory_stats_logger();
@@ -94,14 +105,9 @@ void BM_orc_read_varying_options(nvbench::state& state,
         auto const is_last_chunk = chunk == (num_chunks - 1);
         switch (RowSelection) {
           case row_selection::ALL: break;
-          case row_selection::STRIPES: {
-            auto stripes_to_read = segments_in_chunk(num_stripes, num_chunks, chunk);
-            if (is_last_chunk) {
-              // Need to assume that an additional "overflow" stripe is present
-              stripes_to_read.push_back(num_stripes);
-            }
-            read_options.set_stripes({stripes_to_read});
-          } break;
+          case row_selection::STRIPES:
+            read_options.set_stripes({segments_in_chunk(num_stripes, num_chunks, chunk)});
+            break;
           case row_selection::NROWS:
             read_options.set_skip_rows(chunk * chunk_row_cnt);
             read_options.set_num_rows(chunk_row_cnt);
@@ -129,6 +135,8 @@ using col_selections = nvbench::enum_type_list<column_selection::ALL,
                                                column_selection::ALTERNATE,
                                                column_selection::FIRST_HALF,
                                                column_selection::SECOND_HALF>;
+using row_selections =
+  nvbench::enum_type_list<row_selection::ALL, row_selection::STRIPES, row_selection::NROWS>;
 
 NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
                     NVBENCH_TYPE_AXES(col_selections,
@@ -141,11 +149,22 @@ NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
     {"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"})
   .set_min_samples(4);
 
+NVBENCH_BENCH_TYPES(BM_orc_read_varying_options,
+                    NVBENCH_TYPE_AXES(nvbench::enum_type_list<column_selection::ALL>,
+                                      row_selections,
+                                      nvbench::enum_type_list<uses_index::YES>,
+                                      nvbench::enum_type_list<uses_numpy_dtype::YES>,
+                                      nvbench::enum_type_list<cudf::type_id::EMPTY>))
+  .set_name("orc_read_row_selection")
+  .set_type_axes_names(
+    {"column_selection", "row_selection", "uses_index", "uses_numpy_dtype", "timestamp_type"})
+  .set_min_samples(4);
+
 NVBENCH_BENCH_TYPES(
   BM_orc_read_varying_options,
   NVBENCH_TYPE_AXES(
     nvbench::enum_type_list<column_selection::ALL>,
-    nvbench::enum_type_list<row_selection::NROWS>,
+    nvbench::enum_type_list<row_selection::ALL>,
     nvbench::enum_type_list<uses_index::YES, uses_index::NO>,
     nvbench::enum_type_list<uses_numpy_dtype::YES, uses_numpy_dtype::NO>,
     nvbench::enum_type_list<cudf::type_id::EMPTY, cudf::type_id::TIMESTAMP_NANOSECONDS>))
diff --git a/cpp/benchmarks/io/orc/orc_writer.cpp b/cpp/benchmarks/io/orc/orc_writer.cpp
index 21d903d42ae..545f8d10122 100644
--- a/cpp/benchmarks/io/orc/orc_writer.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer.cpp
@@ -38,6 +38,8 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   },
   [](auto) { return std::string{}; })
 
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr int64_t data_size        = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
diff --git a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
index 494b0d0d98e..592eae96362 100644
--- a/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
+++ b/cpp/benchmarks/io/orc/orc_writer_chunks.cpp
@@ -29,8 +29,8 @@
 
 #include <nvbench/nvbench.cuh>
 
-// to enable, run cmake with -DBUILD_BENCHMARKS=ON
-
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr int64_t data_size = 512 << 20;
 
 void nvbench_orc_write(nvbench::state& state)
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
index 761cbeb62f8..7a4e649d4fb 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_input.cpp
@@ -25,6 +25,8 @@
 
 #include <nvbench/nvbench.cuh>
 
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr size_t data_size         = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
diff --git a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
index 52121859f13..b5e4f6d8f2b 100644
--- a/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_reader_options.cpp
@@ -25,6 +25,8 @@
 
 #include <nvbench/nvbench.cuh>
 
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr std::size_t data_size      = 512 << 20;
 constexpr std::size_t row_group_size = 128 << 20;
 
diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index 1cb83e5b4c8..753ffbd00c9 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -25,8 +25,6 @@
 
 #include <nvbench/nvbench.cuh>
 
-// to enable, run cmake with -DBUILD_BENCHMARKS=ON
-
 NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   cudf::io::statistics_freq,
   [](auto value) {
@@ -39,6 +37,8 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
   },
   [](auto) { return std::string{}; })
 
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr size_t data_size         = 512 << 20;
 constexpr cudf::size_type num_cols = 64;
 
diff --git a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
index e563055194e..11b29cc2297 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer_chunks.cpp
@@ -27,8 +27,8 @@
 
 #include <nvbench/nvbench.cuh>
 
-// to enable, run cmake with -DBUILD_BENCHMARKS=ON
-
+// Size of the data in the the benchmark dataframe; chosen to be low enough to allow benchmarks to
+// run on most GPUs, but large enough to allow highest throughput
 constexpr int64_t data_size = 512 << 20;
 
 void PQ_write(nvbench::state& state)
diff --git a/cpp/include/cudf/io/orc_metadata.hpp b/cpp/include/cudf/io/orc_metadata.hpp
index d974eaa103a..6ef7ea49c59 100644
--- a/cpp/include/cudf/io/orc_metadata.hpp
+++ b/cpp/include/cudf/io/orc_metadata.hpp
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <cudf/io/orc_types.hpp>
 #include <cudf/io/types.hpp>
 
 #include <optional>
@@ -180,7 +181,7 @@ struct column_statistics {
    *
    * @param detail_statistics The statistics to initialize the object with
    */
-  column_statistics(cudf::io::orc::column_statistics&& detail_statistics);
+  column_statistics(orc::column_statistics&& detail_statistics);
 };
 
 /**
@@ -207,5 +208,166 @@ struct parsed_orc_statistics {
  */
 parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info);
 
+/**
+ * @brief Schema of an ORC column, including the nested columns.
+ */
+struct orc_column_schema {
+ public:
+  /**
+   * @brief constructor
+   *
+   * @param name column name
+   * @param type ORC type
+   * @param children child columns (empty for non-nested types)
+   */
+  orc_column_schema(std::string_view name,
+                    orc::TypeKind type,
+                    std::vector<orc_column_schema> children)
+    : _name{name}, _type_kind{type}, _children{std::move(children)}
+  {
+  }
+
+  /**
+   * @brief Returns ORC column name; can be empty.
+   *
+   * @return Column name
+   */
+  [[nodiscard]] auto name() const { return _name; }
+
+  /**
+   * @brief Returns ORC type of the column.
+   *
+   * @return Column ORC type
+   */
+  [[nodiscard]] auto type_kind() const { return _type_kind; }
+
+  /**
+   * @brief Returns schemas of all child columns.
+   *
+   * @return Children schemas
+   */
+  [[nodiscard]] auto const& children() const& { return _children; }
+
+  /** @copydoc children
+   * Children array is moved out of the object (rvalues only).
+   *
+   */
+  [[nodiscard]] auto children() && { return std::move(_children); }
+
+  /**
+   * @brief Returns schema of the child with the given index.
+   *
+   * @param idx child index
+   *
+   * @return Child schema
+   */
+  [[nodiscard]] auto const& child(int idx) const& { return children().at(idx); }
+
+  /** @copydoc child
+   * Child is moved out of the object (rvalues only).
+   *
+   */
+  [[nodiscard]] auto child(int idx) && { return std::move(children().at(idx)); }
+
+  /**
+   * @brief Returns the number of child columns.
+   *
+   * @return Children count
+   */
+  [[nodiscard]] auto num_children() const { return children().size(); }
+
+ private:
+  std::string _name;
+  orc::TypeKind _type_kind;
+  std::vector<orc_column_schema> _children;
+};
+
+/**
+ * @brief Schema of an ORC file.
+ */
+struct orc_schema {
+ public:
+  /**
+   * @brief constructor
+   *
+   * @param root_column_schema root column
+   */
+  orc_schema(orc_column_schema root_column_schema) : _root{std::move(root_column_schema)} {}
+
+  /**
+   * @brief Returns the schema of the struct column that contains all columns as fields.
+   *
+   * @return Root column schema
+   */
+  [[nodiscard]] auto const& root() const& { return _root; }
+
+  /** @copydoc root
+   * Root column schema is moved out of the object (rvalues only).
+   *
+   */
+  [[nodiscard]] auto root() && { return std::move(_root); }
+
+ private:
+  orc_column_schema _root;
+};
+
+/**
+ * @brief Information about content of an ORC file.
+ */
+class orc_metadata {
+ public:
+  /**
+   * @brief constructor
+   *
+   * @param schema ORC schema
+   * @param num_rows number of rows
+   * @param num_stripes number of stripes
+   */
+  orc_metadata(orc_schema schema, size_type num_rows, size_type num_stripes)
+    : _schema{std::move(schema)}, _num_rows{num_rows}, _num_stripes{num_stripes}
+  {
+  }
+
+  /**
+   * @brief Returns the ORC schema.
+   *
+   * @return ORC schema
+   */
+  [[nodiscard]] auto const& schema() const { return _schema; }
+
+  ///< Number of rows in the root column; can vary for nested columns
+  /**
+   * @brief Returns the number of rows of the root column.
+   *
+   * If a file contains list columns, nested columns can have a different number of rows.
+   *
+   * @return Number of rows
+   */
+  [[nodiscard]] auto num_rows() const { return _num_rows; }
+
+  /**
+   * @brief Returns the number of stripes in the file.
+   *
+   * @return Number of stripes
+   */
+  [[nodiscard]] auto num_stripes() const { return _num_stripes; }
+
+ private:
+  orc_schema _schema;
+  size_type _num_rows;
+  size_type _num_stripes;
+};
+
+/**
+ * @brief Reads file-level and stripe-level statistics of ORC dataset.
+ *
+ * @ingroup io_readers
+ *
+ * @param src_info Dataset source
+ *
+ * @return Column names and decoded ORC statistics
+ */
+orc_metadata read_orc_metadata(source_info const& src_info);
+
 }  // namespace io
 }  // namespace cudf
diff --git a/cpp/src/io/orc/orc_common.hpp b/cpp/include/cudf/io/orc_types.hpp
similarity index 94%
rename from cpp/src/io/orc/orc_common.hpp
rename to cpp/include/cudf/io/orc_types.hpp
index c2898b362a6..09cae2ef06c 100644
--- a/cpp/src/io/orc/orc_common.hpp
+++ b/cpp/include/cudf/io/orc_types.hpp
@@ -18,11 +18,7 @@
 
 #include <cstdint>
 
-namespace cudf {
-namespace io {
-namespace orc {
-
-static constexpr uint32_t block_header_size = 3;
+namespace cudf::io::orc {
 
 enum CompressionKind : uint8_t {
   NONE   = 0,
@@ -87,6 +83,4 @@ enum ProtofType : uint8_t {
   INVALID_7   = 7,
 };
 
-}  // namespace orc
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::orc
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 94a191147c2..c244a30dc75 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -34,6 +34,8 @@
 #include <cudf/utilities/error.hpp>
 #include <io/orc/orc.hpp>
 
+#include <cudf/detail/iterator.cuh>
+
 namespace cudf {
 namespace io {
 // Returns builder for csv_reader_options
@@ -337,6 +339,40 @@ parsed_orc_statistics read_parsed_orc_statistics(source_info const& src_info)
 
   return result;
 }
+namespace {
+orc_column_schema make_orc_column_schema(host_span<orc::SchemaType const> orc_schema,
+                                         uint32_t column_id,
+                                         std::string column_name)
+{
+  auto const& orc_col_schema = orc_schema[column_id];
+  std::vector<orc_column_schema> children;
+  children.reserve(orc_col_schema.subtypes.size());
+  std::transform(
+    orc_col_schema.subtypes.cbegin(),
+    orc_col_schema.subtypes.cend(),
+    cudf::detail::make_counting_transform_iterator(0,
+                                                   [&names = orc_col_schema.fieldNames](size_t i) {
+                                                     return i < names.size() ? names[i]
+                                                                             : std::string{};
+                                                   }),
+    std::back_inserter(children),
+    [&](auto& type, auto name) { return make_orc_column_schema(orc_schema, type, name); });
+
+  return {std::move(column_name), orc_schema[column_id].kind, std::move(children)};
+}
+};  // namespace
+
+orc_metadata read_orc_metadata(source_info const& src_info)
+{
+  auto sources = make_datasources(src_info);
+
+  CUDF_EXPECTS(sources.size() == 1, "Only a single source is currently supported.");
+  auto const footer = orc::metadata(sources.front().get(), cudf::detail::default_stream_value).ff;
+
+  return {{make_orc_column_schema(footer.types, 0, "")},
+          static_cast<size_type>(footer.numberOfRows),
+          static_cast<size_type>(footer.stripes.size())};
+}
 
 /**
  * @copydoc cudf::io::read_orc
diff --git a/cpp/src/io/orc/dict_enc.cu b/cpp/src/io/orc/dict_enc.cu
index 0b5de26adfc..898df3ef0f9 100644
--- a/cpp/src/io/orc/dict_enc.cu
+++ b/cpp/src/io/orc/dict_enc.cu
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "orc_common.hpp"
 #include "orc_gpu.hpp"
 
+#include <cudf/io/orc_types.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <io/utilities/block_utils.cuh>
 
diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp
index 2018024f566..44882b71925 100644
--- a/cpp/src/io/orc/orc.hpp
+++ b/cpp/src/io/orc/orc.hpp
@@ -16,11 +16,10 @@
 
 #pragma once
 
-#include "orc_common.hpp"
-
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/orc_metadata.hpp>
+#include <cudf/io/orc_types.hpp>
 #include <cudf/utilities/error.hpp>
 #include <io/comp/io_uncomp.hpp>
 
@@ -37,6 +36,9 @@
 namespace cudf {
 namespace io {
 namespace orc {
+
+static constexpr uint32_t block_header_size = 3;
+
 struct PostScript {
   uint64_t footerLength       = 0;     // the length of the footer section in bytes
   CompressionKind compression = NONE;  // the kind of generic compression used
diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp
index c7a7a423cf2..1e4e36ee91c 100644
--- a/cpp/src/io/orc/orc_gpu.hpp
+++ b/cpp/src/io/orc/orc_gpu.hpp
@@ -19,8 +19,8 @@
 #include "timezone.cuh"
 
 #include "orc.hpp"
-#include "orc_common.hpp"
 
+#include <cudf/io/orc_types.hpp>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu
index bbff689082e..1303dd126ef 100644
--- a/cpp/src/io/orc/stats_enc.cu
+++ b/cpp/src/io/orc/stats_enc.cu
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "orc_common.hpp"
 #include "orc_gpu.hpp"
 
+#include <cudf/io/orc_types.hpp>
 #include <io/utilities/block_utils.cuh>
 
 #include <rmm/cuda_stream_view.hpp>
diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu
index c9cc0f04b3c..bf883986c84 100644
--- a/cpp/src/io/orc/stripe_data.cu
+++ b/cpp/src/io/orc/stripe_data.cu
@@ -14,12 +14,13 @@
  * limitations under the License.
  */
 
-#include <cub/cub.cuh>
+#include "orc_gpu.hpp"
+
+#include <cudf/io/orc_types.hpp>
 #include <io/utilities/block_utils.cuh>
-#include <rmm/cuda_stream_view.hpp>
 
-#include "orc_common.hpp"
-#include "orc_gpu.hpp"
+#include <cub/cub.cuh>
+#include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {
 namespace io {
diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index ef4bdd421fb..109030ef160 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "orc_common.hpp"
 #include "orc_gpu.hpp"
 
+#include <cudf/io/orc_types.hpp>
 #include <io/comp/nvcomp_adapter.hpp>
 #include <io/utilities/block_utils.cuh>
 #include <io/utilities/config_utils.hpp>
diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu
index bd65089810e..381a734021c 100644
--- a/cpp/src/io/orc/stripe_init.cu
+++ b/cpp/src/io/orc/stripe_init.cu
@@ -14,9 +14,9 @@
  * limitations under the License.
  */
 
-#include "orc_common.hpp"
 #include "orc_gpu.hpp"
 
+#include <cudf/io/orc_types.hpp>
 #include <io/utilities/block_utils.cuh>
 
 #include <cub/cub.cuh>
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 2f761eeac66..77496fe5c4b 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -143,6 +143,10 @@ struct OrcReaderTest : public cudf::test::BaseFixture {
 struct OrcStatisticsTest : public cudf::test::BaseFixture {
 };
 
+// Test fixture for metadata tests
+struct OrcMetadataReaderTest : public cudf::test::BaseFixture {
+};
+
 namespace {
 // Generates a vector of uniform random values of type T
 template <typename T>
@@ -1590,4 +1594,105 @@ TEST_F(OrcReaderTest, EmptyColumnsParam)
   EXPECT_EQ(result.tbl->num_rows(), 0);
 }
 
+TEST_F(OrcMetadataReaderTest, TestBasic)
+{
+  auto const num_rows = 1'200'000;
+
+  auto ints   = random_values<int>(num_rows);
+  auto floats = random_values<float>(num_rows);
+  int32_col int_col(ints.begin(), ints.end());
+  float32_col float_col(floats.begin(), floats.end());
+
+  table_view expected({int_col, float_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("int_col");
+  expected_metadata.column_metadata[1].set_name("float_col");
+
+  auto filepath = temp_env->get_temp_filepath("MetadataTest.orc");
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_orc(out_opts);
+
+  auto meta = read_orc_metadata(cudf::io::source_info{filepath});
+  EXPECT_EQ(meta.num_rows(), num_rows);
+
+  EXPECT_EQ(meta.schema().root().name(), "");
+  EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::orc::STRUCT);
+  ASSERT_EQ(meta.schema().root().num_children(), 2);
+
+  EXPECT_EQ(meta.schema().root().child(0).name(), "int_col");
+  EXPECT_EQ(meta.schema().root().child(1).name(), "float_col");
+}
+
+TEST_F(OrcMetadataReaderTest, TestNested)
+{
+  auto const num_rows       = 1'200'000;
+  auto const lists_per_row  = 4;
+  auto const num_child_rows = num_rows * lists_per_row;
+
+  auto keys = random_values<int>(num_child_rows);
+  auto vals = random_values<float>(num_child_rows);
+  int32_col keys_col(keys.begin(), keys.end());
+  float32_col vals_col(vals.begin(), vals.end());
+  auto s_col = struct_col({keys_col, vals_col}).release();
+
+  std::vector<int> row_offsets(num_rows + 1);
+  for (int idx = 0; idx < num_rows + 1; ++idx) {
+    row_offsets[idx] = idx * lists_per_row;
+  }
+  int32_col offsets(row_offsets.begin(), row_offsets.end());
+
+  auto list_col =
+    cudf::make_lists_column(num_rows, offsets.release(), std::move(s_col), 0, rmm::device_buffer{});
+
+  table_view expected({*list_col, *list_col});
+
+  cudf::io::table_input_metadata expected_metadata(expected);
+  expected_metadata.column_metadata[0].set_name("maps");
+  expected_metadata.column_metadata[0].set_list_column_as_map();
+  expected_metadata.column_metadata[1].set_name("lists");
+  expected_metadata.column_metadata[1].child(1).child(0).set_name("int_field");
+  expected_metadata.column_metadata[1].child(1).child(1).set_name("float_field");
+
+  auto filepath = temp_env->get_temp_filepath("MetadataTest.orc");
+  cudf::io::orc_writer_options out_opts =
+    cudf::io::orc_writer_options::builder(cudf::io::sink_info{filepath}, expected)
+      .metadata(&expected_metadata);
+  cudf::io::write_orc(out_opts);
+
+  auto meta = read_orc_metadata(cudf::io::source_info{filepath});
+  EXPECT_EQ(meta.num_rows(), num_rows);
+
+  EXPECT_EQ(meta.schema().root().name(), "");
+  EXPECT_EQ(meta.schema().root().type_kind(), cudf::io::orc::STRUCT);
+  ASSERT_EQ(meta.schema().root().num_children(), 2);
+
+  auto const& out_map_col = meta.schema().root().child(0);
+  EXPECT_EQ(out_map_col.name(), "maps");
+  EXPECT_EQ(out_map_col.type_kind(), cudf::io::orc::MAP);
+  ASSERT_EQ(out_map_col.num_children(), 2);
+  EXPECT_EQ(out_map_col.child(0).name(), "");  // keys (no name in ORC)
+  EXPECT_EQ(out_map_col.child(1).name(), "");  // values (no name in ORC)
+
+  auto const& out_list_col = meta.schema().root().child(1);
+  EXPECT_EQ(out_list_col.name(), "lists");
+  EXPECT_EQ(out_list_col.type_kind(), cudf::io::orc::LIST);
+  ASSERT_EQ(out_list_col.num_children(), 1);
+
+  auto const& out_list_struct_col = out_list_col.child(0);
+  EXPECT_EQ(out_list_struct_col.name(), "");  // elements (no name in ORC)
+  EXPECT_EQ(out_list_struct_col.type_kind(), cudf::io::orc::STRUCT);
+  ASSERT_EQ(out_list_struct_col.num_children(), 2);
+
+  auto const& out_int_col = out_list_struct_col.child(0);
+  EXPECT_EQ(out_int_col.name(), "int_field");
+  EXPECT_EQ(out_int_col.type_kind(), cudf::io::orc::INT);
+
+  auto const& out_float_col = out_list_struct_col.child(1);
+  EXPECT_EQ(out_float_col.name(), "float_field");
+  EXPECT_EQ(out_float_col.type_kind(), cudf::io::orc::FLOAT);
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From 2fe06bcf37506992778bfb16147e66f566b32915 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 1 Nov 2022 13:09:09 -0700
Subject: [PATCH 104/202] Leverage rapids_cython for more automated RPATH
 handling (#11996)

This PR leverages a new feature of rapids-cmake to avoid needing to manually set the RPATHs for all extension modules individually, instead just pointing to a directory once and then letting rapids-cmake automatically handle the rest. This approach is a lot less error-prone since developers do not need to keep track of the relative paths in each CMakeLists.txt file.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Robert Maynard (https://github.com/robertmaynard)

URL: https://github.com/rapidsai/cudf/pull/11996
---
 python/cudf/CMakeLists.txt                          | 13 +++++++++----
 python/cudf/cudf/_lib/CMakeLists.txt                |  6 +-----
 python/cudf/cudf/_lib/io/CMakeLists.txt             |  6 +-----
 python/cudf/cudf/_lib/nvtext/CMakeLists.txt         |  6 +-----
 python/cudf/cudf/_lib/strings/CMakeLists.txt        |  6 +-----
 .../cudf/cudf/_lib/strings/convert/CMakeLists.txt   |  6 +-----
 python/cudf/cudf/_lib/strings/split/CMakeLists.txt  |  6 +-----
 python/strings_udf/strings_udf/_lib/CMakeLists.txt  |  4 ----
 8 files changed, 15 insertions(+), 38 deletions(-)

diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 0781a38e6ad..f8eb3af86d7 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -39,6 +39,8 @@ else()
   set(cudf_FOUND OFF)
 endif()
 
+include(rapids-cython)
+
 if(NOT cudf_FOUND)
   # TODO: This will not be necessary once we upgrade to CMake 3.22, which will pull in the required
   # languages for the C++ project even if this project does not require those languges.
@@ -54,16 +56,19 @@ if(NOT cudf_FOUND)
   add_subdirectory(../../cpp cudf-cpp)
 
   # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the
-  # library in the _lib/cpp directory as a single source of truth and modify the other rpaths
+  # library in the cudf directory as a single source of truth and modify the other rpaths
   # appropriately.
-  install(TARGETS cudf DESTINATION cudf/_lib/cpp)
+  set(cython_lib_dir cudf)
+  install(TARGETS cudf DESTINATION ${cython_lib_dir})
 endif()
 
-include(rapids-cython)
 rapids_cython_init()
 
 add_subdirectory(cudf/_lib)
 
 include(cmake/Modules/ProtobufHelpers.cmake)
-
 codegen_protoc(cudf/utils/metadata/orc_column_statistics.proto)
+
+if(DEFINED cython_lib_dir)
+  rapids_cython_add_rpath_entries(TARGET cudf PATHS "${cython_lib_dir}")
+endif()
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index 1f6b2069b49..df17b8f2032 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -57,7 +57,7 @@ set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf
 )
 
 # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was
@@ -69,10 +69,6 @@ foreach(target IN LISTS targets_using_numpy)
   target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
 endforeach()
 
-foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp")
-endforeach()
-
 add_subdirectory(io)
 add_subdirectory(nvtext)
 add_subdirectory(strings)
diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt
index b12b085fc76..af5ffccd237 100644
--- a/python/cudf/cudf/_lib/io/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/io/CMakeLists.txt
@@ -17,14 +17,10 @@ set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ASSOCIATED_TARGETS cudf
 )
 
 set(targets_using_numpy io_datasource io_utils)
 foreach(target IN LISTS targets_using_numpy)
   target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
 endforeach()
-
-foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp")
-endforeach()
diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
index d96999a077e..3b925fb5548 100644
--- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
@@ -19,9 +19,5 @@ set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf
 )
-
-foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp")
-endforeach()
diff --git a/python/cudf/cudf/_lib/strings/CMakeLists.txt b/python/cudf/cudf/_lib/strings/CMakeLists.txt
index 8ed5c5e03c1..a5e87a456cb 100644
--- a/python/cudf/cudf/_lib/strings/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/strings/CMakeLists.txt
@@ -38,12 +38,8 @@ set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf
 )
 
-foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../cpp")
-endforeach()
-
 add_subdirectory(convert)
 add_subdirectory(split)
diff --git a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt
index ea2e3943b5a..434f79d3b5f 100644
--- a/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/strings/convert/CMakeLists.txt
@@ -20,9 +20,5 @@ set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf
 )
-
-foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp")
-endforeach()
diff --git a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt
index 2d23c0d21cb..59a22c06e85 100644
--- a/python/cudf/cudf/_lib/strings/split/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/strings/split/CMakeLists.txt
@@ -18,9 +18,5 @@ set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX strings_ ASSOCIATED_TARGETS cudf
 )
-
-foreach(cython_module IN LISTS RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/../../cpp")
-endforeach()
diff --git a/python/strings_udf/strings_udf/_lib/CMakeLists.txt b/python/strings_udf/strings_udf/_lib/CMakeLists.txt
index 91069a43891..55a33a050e0 100644
--- a/python/strings_udf/strings_udf/_lib/CMakeLists.txt
+++ b/python/strings_udf/strings_udf/_lib/CMakeLists.txt
@@ -19,7 +19,3 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}"
 )
-
-foreach(cython_module IN LISTS _RAPIDS_CYTHON_CREATED_TARGETS)
-  set_target_properties(${cython_module} PROPERTIES INSTALL_RPATH "\$ORIGIN;\$ORIGIN/cpp")
-endforeach()

From 80c238c95a0a748e476f4f8ee40076526296b063 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 1 Nov 2022 15:42:38 -0500
Subject: [PATCH 105/202] Fix black exclusions. (#12036)

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index a756854eae7..dfd22f33785 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 line-length = 79
 target-version = ["py38"]
 include = '\.py?$'
-exclude = '''
+force-exclude = '''
 /(
     thirdparty |
     \.eggs |

From f19bdbca559a29d08b5a8ea9a8ddf9d4723676ac Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 1 Nov 2022 16:26:43 -0500
Subject: [PATCH 106/202] Remove smart quotes from all docstrings. (#12035)

This PR removes all "smart quotes" from the library by enforcing a pre-commit hook.

Smart quotes typically arise from copying rendered docstrings from Pandas, because Sphinx automatically transforms straight quotes into smart quotes when rendering the docs as HTML. However, the use of smart quotes is undesirable in code, and makes it difficult to do find-replace transformations if straight and smart quotes are mixed.

I have made suggestions to fix this several times before, so I am making the suggestions more permanent and automatically enforceable via a pre-commit style check:
- https://github.com/rapidsai/cudf/pull/12025#discussion_r1008536741
- https://github.com/rapidsai/cudf/pull/9817#discussion_r791988101
- https://github.com/rapidsai/cudf/pull/9571#discussion_r741309852

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12035
---
 .pre-commit-config.yaml                       | 10 +++++++
 README.md                                     |  2 +-
 docs/cudf/source/user_guide/10min.ipynb       |  2 +-
 .../cudf/source/user_guide/missing-data.ipynb |  4 +--
 python/cudf/cudf/_lib/search.pyx              |  6 ++--
 .../_lib/strings/convert/convert_urls.pyx     |  4 +--
 python/cudf/cudf/_lib/strings/padding.pyx     |  2 +-
 python/cudf/cudf/core/column/string.py        | 30 +++++++++----------
 python/cudf/cudf/core/dataframe.py            | 12 ++++----
 python/cudf/cudf/core/frame.py                | 12 ++++----
 python/cudf/cudf/core/groupby/groupby.py      | 12 ++++----
 python/cudf/cudf/core/index.py                | 10 +++----
 python/cudf/cudf/core/indexed_frame.py        |  6 ++--
 python/cudf/cudf/core/reshape.py              |  2 +-
 python/cudf/cudf/core/series.py               | 12 ++++----
 python/cudf/cudf/testing/testing.py           | 18 +++++------
 python/cudf/cudf/utils/docutils.py            |  2 +-
 python/cudf/cudf/utils/ioutils.py             | 12 ++++----
 python/custreamz/README.md                    |  2 +-
 19 files changed, 85 insertions(+), 75 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 06a69719517..2b52b040672 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -52,6 +52,16 @@ repos:
               - id: clang-format
                 types_or: [c, c++, cuda]
                 args: ["-fallback-style=none", "-style=file", "-i"]
+      - repo: https://github.com/sirosen/texthooks
+        rev: 0.4.0
+        hooks:
+              - id: fix-smartquotes
+                exclude: |
+                  (?x)^(
+                    ^cpp/include/cudf_test/cxxopts.hpp|
+                    ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*|
+                    ^python/cudf/cudf/tests/test_text.py
+                  )
       - repo: local
         hooks:
               - id: no-deprecationwarning
diff --git a/README.md b/README.md
index 641ce1316b3..a013d3a9ea4 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ For additional examples, browse our complete [API documentation](https://docs.ra
 
 ## Quick Start
 
-Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize cuDF.
+Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you're running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize cuDF.
 
 ## Installation
 
diff --git a/docs/cudf/source/user_guide/10min.ipynb b/docs/cudf/source/user_guide/10min.ipynb
index ce6c55fe134..870e334c216 100644
--- a/docs/cudf/source/user_guide/10min.ipynb
+++ b/docs/cudf/source/user_guide/10min.ipynb
@@ -15,7 +15,7 @@
     "\n",
     "[Dask](https://dask.org/) is a flexible library for parallel computing in Python that makes scaling out your workflow smooth and simple. On the CPU, Dask uses Pandas to execute operations in parallel on DataFrame partitions.\n",
     "\n",
-    "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed by cuDF GPU DataFrames as opposed to Pandas DataFrames. For instance, when you call dask_cudf.read_csv(...), your cluster’s GPUs do the work of parsing the CSV file(s) with underlying cudf.read_csv().\n",
+    "[Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) extends Dask where necessary to allow its DataFrame partitions to be processed by cuDF GPU DataFrames as opposed to Pandas DataFrames. For instance, when you call dask_cudf.read_csv(...), your cluster's GPUs do the work of parsing the CSV file(s) with underlying cudf.read_csv().\n",
     "\n",
     "\n",
     "### When to use cuDF and Dask-cuDF\n",
diff --git a/docs/cudf/source/user_guide/missing-data.ipynb b/docs/cudf/source/user_guide/missing-data.ipynb
index ad12c675373..ac5bddd34cf 100644
--- a/docs/cudf/source/user_guide/missing-data.ipynb
+++ b/docs/cudf/source/user_guide/missing-data.ipynb
@@ -229,7 +229,7 @@
    "id": "acdf29d7",
    "metadata": {},
    "source": [
-    "One has to be mindful that in Python (and NumPy), the nan's don’t compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`."
+    "One has to be mindful that in Python (and NumPy), the nan's don't compare equal, but None's do. Note that cudf/NumPy uses the fact that `np.nan != np.nan`, and treats `None` like `np.nan`."
    ]
   },
   {
@@ -279,7 +279,7 @@
    "id": "4fdb8bc7",
    "metadata": {},
    "source": [
-    "So as compared to above, a scalar equality comparison versus a None/np.nan doesn’t provide useful information."
+    "So as compared to above, a scalar equality comparison versus a None/np.nan doesn't provide useful information."
    ]
   },
   {
diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx
index d5568f53231..b8abe3d0dab 100644
--- a/python/cudf/cudf/_lib/search.pyx
+++ b/python/cudf/cudf/_lib/search.pyx
@@ -24,9 +24,9 @@ def search_sorted(
         List of columns to search in
     values : List of columns
         List of value columns to search for
-    side : str {‘left’, ‘right’} optional
-        If ‘left’, the index of the first suitable location is given.
-        If ‘right’, return the last such index
+    side : str {'left', 'right'} optional
+        If 'left', the index of the first suitable location is given.
+        If 'right', return the last such index
     """
     cdef unique_ptr[column] c_result
     cdef vector[libcudf_types.order] c_column_order
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
index c391719e853..8d673de12b8 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -41,7 +41,7 @@ def url_encode(Column source_strings):
     """
     Encode each string in column. No format checking is performed.
     All characters are encoded except for ASCII letters, digits,
-    and these characters: ‘.’,’_’,’-‘,’~’. Encoding converts to
+    and these characters: '.','_','-','~'. Encoding converts to
     hex using UTF-8 encoded bytes.
 
     Parameters
diff --git a/python/cudf/cudf/_lib/strings/padding.pyx b/python/cudf/cudf/_lib/strings/padding.pyx
index 99270b340eb..f53feab7936 100644
--- a/python/cudf/cudf/_lib/strings/padding.pyx
+++ b/python/cudf/cudf/_lib/strings/padding.pyx
@@ -59,7 +59,7 @@ def zfill(Column source_strings,
           size_type width):
     """
     Returns a Column by prepending strings in `source_strings`
-    with ‘0’ characters up to the given `width`.
+    with '0' characters up to the given `width`.
     """
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index c84e4ff4adb..625a9c70873 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -116,8 +116,8 @@ class StringMethods(ColumnMethods):
 
     This mimics pandas ``df.str`` interface. nulls stay null
     unless handled otherwise by a particular method.
-    Patterned after Python’s string methods, with some
-    inspiration from R’s stringr package.
+    Patterned after Python's string methods, with some
+    inspiration from R's stringr package.
     """
 
     _column: StringColumn
@@ -709,7 +709,7 @@ def contains(
         >>> idx.str.contains('23', regex=False)
         GenericIndex([False, False, False, True, <NA>], dtype='bool')
 
-        Returning ‘house’ or ‘dog’ when either expression occurs in a string.
+        Returning 'house' or 'dog' when either expression occurs in a string.
 
         >>> s1.str.contains('house|dog', regex=True)
         0    False
@@ -732,7 +732,7 @@ def contains(
         Ensure ``pat`` is a not a literal pattern when ``regex`` is set
         to True. Note in the following example one might expect
         only `s2[1]` and `s2[3]` to return True. However,
-        ‘.0’ as a regex matches any character followed by a 0.
+        '.0' as a regex matches any character followed by a 0.
 
         >>> s2 = cudf.Series(['40', '40.0', '41', '41.0', '35'])
         >>> s2.str.contains('.0', regex=True)
@@ -2903,7 +2903,7 @@ def pad(
             additional characters will be filled with
             character defined in fillchar.
 
-        side : {‘left’, ‘right’, ‘both’}, default ‘left’
+        side : {'left', 'right', 'both'}, default 'left'
             Side from which to fill resulting string.
 
         fillchar : str,  default ' ' (whitespace)
@@ -2930,7 +2930,7 @@ def pad(
             Equivalent to ``Series.str.pad(side='both')``.
 
         zfill
-            Pad strings in the Series/Index by prepending ‘0’ character.
+            Pad strings in the Series/Index by prepending '0' character.
             Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
 
         Examples
@@ -2970,7 +2970,7 @@ def pad(
             side = libstrings.SideType[side.upper()]
         except KeyError:
             raise ValueError(
-                "side has to be either one of {‘left’, ‘right’, ‘both’}"
+                "side has to be either one of {'left', 'right', 'both'}"
             )
 
         return self._return_or_inplace(
@@ -2979,9 +2979,9 @@ def pad(
 
     def zfill(self, width: int) -> SeriesOrIndex:
         """
-        Pad strings in the Series/Index by prepending ‘0’ characters.
+        Pad strings in the Series/Index by prepending '0' characters.
 
-        Strings in the Series/Index are padded with ‘0’ characters
+        Strings in the Series/Index are padded with '0' characters
         on the left of the string to reach a total string length
         width. Strings in the Series/Index with length greater
         or equal to width are unchanged.
@@ -2994,12 +2994,12 @@ def zfill(self, width: int) -> SeriesOrIndex:
         width : int
             Minimum length of resulting string;
             strings with length less than width
-            be prepended with ‘0’ characters.
+            be prepended with '0' characters.
 
         Returns
         -------
         Series/Index of str dtype
-            Returns Series or Index with prepended ‘0’ characters.
+            Returns Series or Index with prepended '0' characters.
 
         See Also
         --------
@@ -3405,7 +3405,7 @@ def wrap(self, width: int, **kwargs) -> SeriesOrIndex:
         `expand_tabsbool` are not yet supported and will raise a
         NotImplementedError if they are set to any value.
 
-        This method currently achieves behavior matching R’s
+        This method currently achieves behavior matching R's
         stringr library ``str_wrap`` function, the equivalent
         pandas implementation can be obtained using the
         following parameter setting:
@@ -3576,7 +3576,7 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex:
         >>> import cudf
         >>> s = cudf.Series(['Lion', 'Monkey', 'Rabbit'])
 
-        The search for the pattern ‘Monkey’ returns one match:
+        The search for the pattern 'Monkey' returns one match:
 
         >>> s.str.findall('Monkey')
         0          []
@@ -3595,7 +3595,7 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex:
 
         Regular expressions are supported too. For instance,
         the search for all the strings ending with
-        the word ‘on’ is shown next:
+        the word 'on' is shown next:
 
         >>> s.str.findall('on$')
         0    [on]
@@ -4228,7 +4228,7 @@ def url_encode(self) -> SeriesOrIndex:
         Returns a URL-encoded format of each string.
         No format checking is performed.
         All characters are encoded except for ASCII letters,
-        digits, and these characters: ``‘.’,’_’,’-‘,’~’``.
+        digits, and these characters: ``'.','_','-','~'``.
         Encoding converts to hex using UTF-8 encoded bytes.
 
         Returns
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 82a4a4a8b65..5c24b222a1b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -2293,7 +2293,7 @@ def reindex(
             Return a new object, even if the passed indexes are the same.
         level : Not supported
         fill_value : Value to use for missing values.
-            Defaults to ``NA``, but can be any “compatible” value.
+            Defaults to ``NA``, but can be any "compatible" value.
         limit : Not supported
         tolerance : Not supported
 
@@ -2358,7 +2358,7 @@ def reindex(
         IE10               404       <NA>
         Konqueror          301       <NA>
 
-        Or we can use “axis-style” keyword arguments
+        Or we can use "axis-style" keyword arguments
         >>> df.reindex(columns=['http_status', 'user_agent'])
                 http_status user_agent
         Firefox            200       <NA>
@@ -3028,7 +3028,7 @@ def rename(
         """Alter column and index labels.
 
         Function / dict values must be unique (1-to-1). Labels not contained in
-        a dict / Series will be left as-is. Extra labels listed don’t throw an
+        a dict / Series will be left as-is. Extra labels listed don't throw an
         error.
 
         ``DataFrame.rename`` supports two calling conventions:
@@ -3635,8 +3635,8 @@ def merge(
             If on is None and not merging on indexes then
             this defaults to the intersection of the columns
             in both DataFrames.
-        how : {‘left’, ‘outer’, ‘inner’, 'leftsemi', 'leftanti'}, \
-            default ‘inner’
+        how : {'left', 'outer', 'inner', 'leftsemi', 'leftanti'}, \
+            default 'inner'
             Type of merge to be performed.
 
             - left : use only keys from left frame, similar to a SQL left
@@ -5363,7 +5363,7 @@ def isin(self, values):
         ----------
         values : iterable, Series, DataFrame or dict
             The result will only be true at a location if all
-            the labels match. If values is a Series, that’s the index.
+            the labels match. If values is a Series, that's the index.
             If values is a dict, the keys must be the column names,
             which must match. If values is a DataFrame, then both the
             index and column labels must match.
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 12c53ae258d..29d5c9ae26d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1363,12 +1363,12 @@ def searchsorted(
         ----------
         value : Frame (Shape must be consistent with self)
             Values to be hypothetically inserted into Self
-        side : str {‘left’, ‘right’} optional, default ‘left‘
-            If ‘left’, the index of the first suitable location found is given
-            If ‘right’, return the last such index
+        side : str {'left', 'right'} optional, default 'left'
+            If 'left', the index of the first suitable location found is given
+            If 'right', return the last such index
         ascending : bool optional, default True
             Sorted Frame is in ascending order (otherwise descending)
-        na_position : str {‘last’, ‘first’} optional, default ‘last‘
+        na_position : str {'last', 'first'} optional, default 'last'
             Position of null values in sorted order
 
         Returns
@@ -1476,8 +1476,8 @@ def argsort(
             Has no effect but is accepted for compatibility with numpy.
         ascending : bool or list of bool, default True
             If True, sort values in ascending order, otherwise descending.
-        na_position : {‘first’ or ‘last’}, default ‘last’
-            Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs
+        na_position : {'first' or 'last'}, default 'last'
+            Argument 'first' puts NaNs at the beginning, 'last' puts NaNs
             at the end.
 
         Returns
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 0ab64bd985a..e4ea59c1f15 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -52,9 +52,9 @@ def _quantile_75(x):
 ----------
 by : mapping, function, label, or list of labels
     Used to determine the groups for the groupby. If by is a
-    function, it’s called on each value of the object’s index.
+    function, it's called on each value of the object's index.
     If a dict or Series is passed, the Series or dict VALUES will
-    be used to determine the groups (the Series’ values are first
+    be used to determine the groups (the Series' values are first
     aligned; see .align() method). If an cupy array is passed, the
     values are used as-is determine the groups. A label or list
     of labels may be passed to group by the columns in self.
@@ -65,7 +65,7 @@ def _quantile_75(x):
 as_index : bool, default True
     For aggregated output, return object with group labels as
     the index. Only relevant for DataFrame input.
-    as_index=False is effectively “SQL-style” grouped output.
+    as_index=False is effectively "SQL-style" grouped output.
 sort : bool, default False
     Sort result by group key. Differ from Pandas, cudf defaults to
     ``False`` for better performance. Note this does not influence
@@ -717,7 +717,7 @@ def _normalize_aggs(
     def pipe(self, func, *args, **kwargs):
         """
         Apply a function `func` with arguments to this GroupBy
-        object and return the function’s result.
+        object and return the function's result.
 
         Parameters
         ----------
@@ -1103,13 +1103,13 @@ def func(x):
     def describe(self, include=None, exclude=None):
         """
         Generate descriptive statistics that summarizes the central tendency,
-        dispersion and shape of a dataset’s distribution, excluding NaN values.
+        dispersion and shape of a dataset's distribution, excluding NaN values.
 
         Analyzes numeric DataFrames only
 
         Parameters
         ----------
-        include: ‘all’, list-like of dtypes or None (default), optional
+        include: 'all', list-like of dtypes or None (default), optional
             list of data types to include in the result.
             Ignored for Series.
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 0628497fc29..fbaa95763a1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -1062,7 +1062,7 @@ def equals(self, other, **kwargs):
         Returns
         -------
         out: bool
-            True if “other” is an Index and it has the same elements
+            True if "other" is an Index and it has the same elements
             as calling index; False otherwise.
         """
         if (
@@ -1414,8 +1414,8 @@ def argsort(
             Has no effect but is accepted for compatibility with numpy.
         ascending : bool or list of bool, default True
             If True, sort values in ascending order, otherwise descending.
-        na_position : {‘first’ or ‘last’}, default ‘last’
-            Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs
+        na_position : {'first' or 'last'}, default 'last'
+            Argument 'first' puts NaNs at the beginning, 'last' puts NaNs
             at the end.
 
         Returns
@@ -1853,7 +1853,7 @@ class DatetimeIndex(GenericIndex):
         This is not yet supported
     tz : pytz.timezone or dateutil.tz.tzfile
         This is not yet supported
-    ambiguous : ‘infer’, bool-ndarray, ‘NaT’, default ‘raise’
+    ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
         This is not yet supported
     name : object
         Name to be stored in the index.
@@ -2547,7 +2547,7 @@ class CategoricalIndex(GenericIndex):
         Whether or not this categorical is treated as an ordered categorical.
         If not given here or in dtype, the resulting categorical will be
         unordered.
-    dtype : CategoricalDtype or “category”, optional
+    dtype : CategoricalDtype or "category", optional
         If CategoricalDtype, cannot be used together with categories or
         ordered.
     copy : bool, default False
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index bbb1c95bef6..57469c0ff72 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -562,8 +562,8 @@ def replace(
             * dict:
                 - Dicts can be used to specify different replacement values
                   for different existing values. For example, {'a': 'b',
-                  'y': 'z'} replaces the value ‘a’ with ‘b’ and
-                  ‘y’ with ‘z’.
+                  'y': 'z'} replaces the value 'a' with 'b' and
+                  'y' with 'z'.
                   To use a dict in this way the ``value`` parameter should
                   be ``None``.
         value : scalar, dict, list-like, str, default None
@@ -1865,7 +1865,7 @@ def sort_values(
             Sort ascending vs. descending. Specify list for multiple sort
             orders. If this is a list of bools, must match the length of the
             by.
-        na_position : {‘first’, ‘last’}, default ‘last’
+        na_position : {'first', 'last'}, default 'last'
             'first' puts nulls at the beginning, 'last' puts nulls at the end
         ignore_index : bool, default False
             If True, index will not be sorted.
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 8e5d0ece729..ba9da2bcb0c 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -484,7 +484,7 @@ def melt(
     4  b        C      4
     5  c        C      6
 
-    The names of ‘variable’ and ‘value’ columns can be customized:
+    The names of 'variable' and 'value' columns can be customized:
 
     >>> cudf.melt(df, id_vars=['A'], value_vars=['B'],
     ...         var_name='myVarname', value_name='myValname')
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 07e1782d788..f9600c84f5e 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -815,7 +815,7 @@ def reindex(self, *args, **kwargs):
         copy : boolean, default True
         level: Not Supported
         fill_value : Value to use for missing values.
-            Defaults to ``NA``, but can be any “compatible” value.
+            Defaults to ``NA``, but can be any "compatible" value.
         limit: Not Supported
         tolerance: Not Supported
 
@@ -1605,7 +1605,7 @@ def drop_duplicates(self, keep="first", inplace=False, ignore_index=False):
         Name: animal, dtype: object
 
         The value `False` for parameter `keep` discards all sets
-        of duplicated entries. Setting the value of ‘inplace’ to
+        of duplicated entries. Setting the value of 'inplace' to
         `True` performs the operation inplace and returns `None`.
 
         >>> s.drop_duplicates(keep=False, inplace=True)
@@ -1881,7 +1881,7 @@ def sort_values(
             Sort ascending vs. descending. Specify list for multiple sort
             orders. If this is a list of bools, must match the length of the
             by.
-        na_position : {‘first’, ‘last’}, default ‘last’
+        na_position : {'first', 'last'}, default 'last'
             'first' puts nulls at the beginning, 'last' puts nulls at the end
         ignore_index : bool, default False
             If True, index will not be sorted.
@@ -2763,7 +2763,7 @@ def value_counts(
             only works with numeric data.
 
         dropna : bool, default True
-            Don’t include counts of NaN and None.
+            Don't include counts of NaN and None.
 
         Returns
         -------
@@ -2886,7 +2886,7 @@ def quantile(
         ----------
         q : float or array-like, default 0.5 (50% quantile)
             0 <= q <= 1, the quantile(s) to compute
-        interpolation : {’linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’}
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
             This optional parameter specifies the interpolation method to use,
             when the desired quantile lies between two data points i and j:
         columns : list of str
@@ -4352,7 +4352,7 @@ def strftime(self, date_format, *args, **kwargs):
         Parameters
         ----------
         date_format : str
-            Date format string (e.g. “%Y-%m-%d”).
+            Date format string (e.g. "%Y-%m-%d").
 
         Returns
         -------
diff --git a/python/cudf/cudf/testing/testing.py b/python/cudf/cudf/testing/testing.py
index 070e4649c7b..a8428c2647b 100644
--- a/python/cudf/cudf/testing/testing.py
+++ b/python/cudf/cudf/testing/testing.py
@@ -134,7 +134,7 @@ def assert_column_equal(
         right Column to compare
     check_dtype : bool, default True
         Whether to check the Column dtype is identical.
-    check_column_type : bool or {‘equiv’}, default ‘equiv’
+    check_column_type : bool or {'equiv'}, default 'equiv'
         Whether to check the columns class, dtype and
         inferred_type are identical. Currently it is idle,
         and similar to pandas.
@@ -152,7 +152,7 @@ def assert_column_equal(
         Relative tolerance. Only used when `check_exact` is False.
     atol : float, default 1e-8
         Absolute tolerance. Only used when `check_exact` is False.
-    obj : str, default ‘ColumnBase’
+    obj : str, default 'ColumnBase'
         Specify object name being compared, internally used to
         show appropriate assertion message.
     """
@@ -322,9 +322,9 @@ def assert_index_equal(
         left Index to compare
     right : Index
         right Index to compare
-    exact : bool or {‘equiv’}, default ‘equiv’
+    exact : bool or {'equiv'}, default 'equiv'
         Whether to check the Index class, dtype and inferred_type
-        are identical. If ‘equiv’, then RangeIndex can be substituted
+        are identical. If 'equiv', then RangeIndex can be substituted
         for Int8Index, Int16Index, Int32Index, Int64Index as well.
     check_names : bool, default True
         Whether to check the names attribute.
@@ -345,7 +345,7 @@ def assert_index_equal(
         Relative tolerance. Only used when `check_exact` is False.
     atol : float, default 1e-8
         Absolute tolerance. Only used when `check_exact` is False.
-    obj : str, default ‘Index’
+    obj : str, default 'Index'
         Specify object name being compared, internally used to
         show appropriate assertion message.
 
@@ -467,7 +467,7 @@ def assert_series_equal(
         right Series to compare
     check_dtype : bool, default True
         Whether to check the Series dtype is identical.
-    check_index_type : bool or {‘equiv’}, default ‘equiv’
+    check_index_type : bool or {'equiv'}, default 'equiv'
         Whether to check the Index class, dtype and inferred_type
         are identical.
     check_series_type : bool, default True
@@ -491,7 +491,7 @@ def assert_series_equal(
         Relative tolerance. Only used when `check_exact` is False.
     atol : float, default 1e-8
         Absolute tolerance. Only used when `check_exact` is False.
-    obj : str, default ‘Series’
+    obj : str, default 'Series'
         Specify object name being compared, internally used to
         show appropriate assertion message.
 
@@ -600,7 +600,7 @@ def assert_frame_equal(
         right DataFrame to compare
     check_dtype : bool, default True
         Whether to check the DataFrame dtype is identical.
-    check_index_type : bool or {‘equiv’}, default ‘equiv’
+    check_index_type : bool or {'equiv'}, default 'equiv'
         Whether to check the Index class, dtype and inferred_type
         are identical.
     check_column_type : bool, default True
@@ -630,7 +630,7 @@ def assert_frame_equal(
         Relative tolerance. Only used when `check_exact` is False.
     atol : float, default 1e-8
         Absolute tolerance. Only used when `check_exact` is False.
-    obj : str, default ‘DataFrame’
+    obj : str, default 'DataFrame'
         Specify object name being compared, internally used to
         show appropriate assertion message.
 
diff --git a/python/cudf/cudf/utils/docutils.py b/python/cudf/cudf/utils/docutils.py
index 9f04e30fb28..09f0eb05eb6 100644
--- a/python/cudf/cudf/utils/docutils.py
+++ b/python/cudf/cudf/utils/docutils.py
@@ -83,7 +83,7 @@ def wrapper(func):
         Generate descriptive statistics.
 
         Descriptive statistics include those that summarize the
-        central tendency, dispersion and shape of a dataset’s
+        central tendency, dispersion and shape of a dataset's
         distribution, excluding ``NaN`` values.
 
         Analyzes both numeric and object series, as well as
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index ebb73ba0ca6..9146405c6ed 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -515,7 +515,7 @@
     If ``None``, similar to ``True`` the dataframe's index(es) will
     be saved, however, instead of being saved as values any
     ``RangeIndex`` will be stored as a range in the metadata so it
-    doesn’t require much space and is faster. Other indexes will
+    doesn't require much space and is faster. Other indexes will
     be included as columns in the file output.
 
 See Also
@@ -1046,7 +1046,7 @@
     are mapped to the particular type passed. If list, types are applied in
     the same order as the column names. If dict, types are mapped to the
     column names.
-    E.g. {{‘a’: np.float64, ‘b’: int32, ‘c’: ‘float’}}
+    E.g. {{'a': np.float64, 'b': int32, 'c': 'float'}}
     If `None`, dtypes are inferred from the dataset. Use `str` to preserve data
     and not infer or interpret to dtype.
 true_values : list, default None
@@ -1084,9 +1084,9 @@
 dayfirst : bool, default False
     DD/MM format dates, international and European format.
 compression : {{'infer', 'gzip', 'zip', None}}, default 'infer'
-    For on-the-fly decompression of on-disk data. If ‘infer’, then detect
-    compression from the following extensions: ‘.gz’,‘.zip’ (otherwise no
-    decompression). If using ‘zip’, the ZIP file must contain only one
+    For on-the-fly decompression of on-disk data. If 'infer', then detect
+    compression from the following extensions: '.gz','.zip' (otherwise no
+    decompression). If using 'zip', the ZIP file must contain only one
     data file to be read in, otherwise the first non-zero-sized file will
     be used. Set to None for no decompression.
 thousands : char, default None
@@ -1210,7 +1210,7 @@
     Write out the index as a column
 encoding : str, default 'utf-8'
     A string representing the encoding to use in the output file
-    Only ‘utf-8’ is currently supported
+    Only 'utf-8' is currently supported
 compression : str, None
     A string representing the compression scheme to use in the the output file
     Compression while writing csv is not supported currently
diff --git a/python/custreamz/README.md b/python/custreamz/README.md
index 0bddc6473a8..6b105c9ea4a 100644
--- a/python/custreamz/README.md
+++ b/python/custreamz/README.md
@@ -41,7 +41,7 @@ A more detailed example of [parsing haproxy logs](https://github.com/rapidsai-co
 
 ## Quick Start
 
-Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with cuStreamz already installed.
+Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you're running. This provides a ready to run Docker container with cuStreamz already installed.
 
 ## Installation
 

From 1c2ad6aaa50db7335b7bd661a9e7737a3e679300 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Tue, 1 Nov 2022 15:32:51 -0700
Subject: [PATCH 107/202] Fix Parquet support for seconds and milliseconds
 duration types (#11854)

Fixes https://github.com/rapidsai/cudf/issues/11833

Parquet writer used int64 for `second` and `millisecond` durations. This does not match the Parquet spec, which requires int32 to be used here.

Changed the physical type of time_millis to int32 to match specs.
Set logical type for time(duration) types.
Using the logical types allows us to write nanosecond durations as nanoseconds, so no precision loss any more.
Parquet writer option `timestamp_type` does not apply to durations any more.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/11854
---
 .../io/parquet/compact_protocol_writer.cpp    |  2 +-
 cpp/src/io/parquet/page_data.cu               | 29 ++++++--
 cpp/src/io/parquet/page_enc.cu                | 21 ++++--
 cpp/src/io/parquet/parquet_gpu.hpp            |  6 +-
 cpp/src/io/parquet/reader_impl.cu             | 13 ++--
 cpp/src/io/parquet/writer_impl.cu             | 43 ++++++-----
 .../statistics_type_identification.cuh        |  3 +-
 cpp/tests/io/parquet_test.cpp                 | 73 ++++++++++++++++++-
 python/cudf/cudf/tests/test_parquet.py        | 56 ++++++++++++++
 9 files changed, 198 insertions(+), 48 deletions(-)

diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp
index 28baad9c51c..f5ae262fa3f 100644
--- a/cpp/src/io/parquet/compact_protocol_writer.cpp
+++ b/cpp/src/io/parquet/compact_protocol_writer.cpp
@@ -150,7 +150,7 @@ size_t CompactProtocolWriter::write(const SchemaElement& s)
   // if (isset.STRING or isset.MAP or isset.LIST or isset.ENUM or isset.DECIMAL or isset.DATE or
   //    isset.TIME or isset.TIMESTAMP or isset.INTEGER or isset.UNKNOWN or isset.JSON or isset.BSON)
   //    {
-  if (isset.TIMESTAMP) { c.field_struct(10, s.logical_type); }
+  if (isset.TIMESTAMP or isset.TIME) { c.field_struct(10, s.logical_type); }
   return c.value();
 }
 
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index 6c314261a13..b36826002f4 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -876,15 +876,15 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
         case BOOLEAN:
           s->dtype_len = 1;  // Boolean are stored as 1 byte on the output
           break;
-        case INT32:
+        case INT32: [[fallthrough]];
         case FLOAT: s->dtype_len = 4; break;
         case INT64:
           if (s->col.ts_clock_rate) {
             int32_t units = 0;
-            if (s->col.converted_type == TIME_MILLIS or s->col.converted_type == TIMESTAMP_MILLIS) {
+            // Duration types are not included because no scaling is done when reading
+            if (s->col.converted_type == TIMESTAMP_MILLIS) {
               units = cudf::timestamp_ms::period::den;
-            } else if (s->col.converted_type == TIME_MICROS or
-                       s->col.converted_type == TIMESTAMP_MICROS) {
+            } else if (s->col.converted_type == TIMESTAMP_MICROS) {
               units = cudf::timestamp_us::period::den;
             } else if (s->col.logical_type.TIMESTAMP.unit.isset.NANOS) {
               units = cudf::timestamp_ns::period::den;
@@ -894,7 +894,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
                                                            : (s->col.ts_clock_rate / units);
             }
           }
-          // Fall through to DOUBLE
+          [[fallthrough]];
         case DOUBLE: s->dtype_len = 8; break;
         case INT96: s->dtype_len = 12; break;
         case BYTE_ARRAY: s->dtype_len = sizeof(string_index_pair); break;
@@ -910,8 +910,16 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
                        : s->dtype_len <= sizeof(int64_t) ? sizeof(int64_t)
                                                          : sizeof(__int128_t);
       } else if (data_type == INT32) {
-        if (dtype_len_out == 1) s->dtype_len = 1;  // INT8 output
-        if (dtype_len_out == 2) s->dtype_len = 2;  // INT16 output
+        if (dtype_len_out == 1) {
+          // INT8 output
+          s->dtype_len = 1;
+        } else if (dtype_len_out == 2) {
+          // INT16 output
+          s->dtype_len = 2;
+        } else if (s->col.converted_type == TIME_MILLIS) {
+          // INT64 output
+          s->dtype_len = 8;
+        }
       } else if (data_type == BYTE_ARRAY && dtype_len_out == 4) {
         s->dtype_len = 4;  // HASH32 output
       } else if (data_type == INT96) {
@@ -1670,7 +1678,12 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
         } else if (dtype == INT96) {
           gpuOutputInt96Timestamp(s, val_src_pos, static_cast<int64_t*>(dst));
         } else if (dtype_len == 8) {
-          if (s->ts_scale) {
+          if (s->dtype_len_in == 4) {
+            // Reading INT32 TIME_MILLIS into 64-bit DURATION_MILLISECONDS
+            // TIME_MILLIS is the only duration type stored as int32:
+            // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#deprecated-time-convertedtype
+            gpuOutputFast(s, val_src_pos, static_cast<uint32_t*>(dst));
+          } else if (s->ts_scale) {
             gpuOutputInt64Timestamp(s, val_src_pos, static_cast<int64_t*>(dst));
           } else {
             gpuOutputFast(s, val_src_pos, static_cast<uint2*>(dst));
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 15bd4fe17e3..8a07ee419b4 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -1109,15 +1109,20 @@ __global__ void __launch_bounds__(128, 8)
       if (t == 0) { s->cur = dst + total_len; }
       if (is_valid) {
         switch (physical_type) {
-          case INT32:
+          case INT32: [[fallthrough]];
           case FLOAT: {
-            int32_t v;
-            if (dtype_len_in == 4)
-              v = s->col.leaf_column->element<int32_t>(val_idx);
-            else if (dtype_len_in == 2)
-              v = s->col.leaf_column->element<int16_t>(val_idx);
-            else
-              v = s->col.leaf_column->element<int8_t>(val_idx);
+            auto const v = [dtype_len = dtype_len_in,
+                            idx       = val_idx,
+                            col       = s->col.leaf_column,
+                            scale     = s->col.ts_scale == 0 ? 1 : s->col.ts_scale]() -> int32_t {
+              switch (dtype_len) {
+                case 8: return col->element<int64_t>(idx) * scale;
+                case 4: return col->element<int32_t>(idx) * scale;
+                case 2: return col->element<int16_t>(idx) * scale;
+                default: return col->element<int8_t>(idx) * scale;
+              }
+            }();
+
             dst[pos + 0] = v;
             dst[pos + 1] = v >> 8;
             dst[pos + 2] = v >> 16;
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index c31a1531fa7..ea3678129ac 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -299,10 +299,12 @@ constexpr size_t kDictScratchSize    = (1 << kDictHashBits) * sizeof(uint32_t);
 inline uint32_t __device__ int32_logical_len(type_id id)
 {
   switch (id) {
-    case cudf::type_id::INT8:
+    case cudf::type_id::INT8: [[fallthrough]];
     case cudf::type_id::UINT8: return 1;
-    case cudf::type_id::INT16:
+    case cudf::type_id::INT16: [[fallthrough]];
     case cudf::type_id::UINT16: return 2;
+    case cudf::type_id::DURATION_SECONDS: [[fallthrough]];
+    case cudf::type_id::DURATION_MILLISECONDS: return 8;
     default: return 4;
   }
 }
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
index 535641654a2..50893ebe583 100644
--- a/cpp/src/io/parquet/reader_impl.cu
+++ b/cpp/src/io/parquet/reader_impl.cu
@@ -130,12 +130,8 @@ type_id to_type_id(SchemaElement const& schema,
     case parquet::UINT_32: return type_id::UINT32;
     case parquet::UINT_64: return type_id::UINT64;
     case parquet::DATE: return type_id::TIMESTAMP_DAYS;
-    case parquet::TIME_MILLIS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::DURATION_MILLISECONDS;
-    case parquet::TIME_MICROS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::DURATION_MICROSECONDS;
+    case parquet::TIME_MILLIS: return type_id::DURATION_MILLISECONDS;
+    case parquet::TIME_MICROS: return type_id::DURATION_MICROSECONDS;
     case parquet::TIMESTAMP_MILLIS:
       return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
                                                    : type_id::TIMESTAMP_MILLISECONDS;
@@ -173,6 +169,11 @@ type_id to_type_id(SchemaElement const& schema,
                                                  : type_id::TIMESTAMP_NANOSECONDS;
   }
 
+  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
+      logical_type.TIME.unit.isset.NANOS) {
+    return type_id::DURATION_NANOSECONDS;
+  }
+
   // is it simply a struct?
   if (schema.is_struct()) { return type_id::STRUCT; }
 
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index f2089d27a87..a49dbcc703c 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -374,44 +374,53 @@ struct leaf_schema_fn {
   template <typename T>
   std::enable_if_t<std::is_same_v<T, cudf::duration_D>, void> operator()()
   {
-    col_schema.type           = Type::INT32;
-    col_schema.converted_type = ConvertedType::TIME_MILLIS;
-    col_schema.stats_dtype    = statistics_dtype::dtype_int64;
+    col_schema.type                                = Type::INT32;
+    col_schema.converted_type                      = ConvertedType::TIME_MILLIS;
+    col_schema.stats_dtype                         = statistics_dtype::dtype_int32;
+    col_schema.ts_scale                            = 24 * 60 * 60 * 1000;
+    col_schema.logical_type.isset.TIME             = true;
+    col_schema.logical_type.TIME.unit.isset.MILLIS = true;
   }
 
   template <typename T>
   std::enable_if_t<std::is_same_v<T, cudf::duration_s>, void> operator()()
   {
-    col_schema.type           = Type::INT64;
-    col_schema.converted_type = ConvertedType::TIME_MILLIS;
-    col_schema.stats_dtype    = statistics_dtype::dtype_int64;
-    col_schema.ts_scale       = 1000;
+    col_schema.type                                = Type::INT32;
+    col_schema.converted_type                      = ConvertedType::TIME_MILLIS;
+    col_schema.stats_dtype                         = statistics_dtype::dtype_int32;
+    col_schema.ts_scale                            = 1000;
+    col_schema.logical_type.isset.TIME             = true;
+    col_schema.logical_type.TIME.unit.isset.MILLIS = true;
   }
 
   template <typename T>
   std::enable_if_t<std::is_same_v<T, cudf::duration_ms>, void> operator()()
   {
-    col_schema.type           = Type::INT64;
-    col_schema.converted_type = ConvertedType::TIME_MILLIS;
-    col_schema.stats_dtype    = statistics_dtype::dtype_int64;
+    col_schema.type                                = Type::INT32;
+    col_schema.converted_type                      = ConvertedType::TIME_MILLIS;
+    col_schema.stats_dtype                         = statistics_dtype::dtype_int32;
+    col_schema.logical_type.isset.TIME             = true;
+    col_schema.logical_type.TIME.unit.isset.MILLIS = true;
   }
 
   template <typename T>
   std::enable_if_t<std::is_same_v<T, cudf::duration_us>, void> operator()()
   {
-    col_schema.type           = Type::INT64;
-    col_schema.converted_type = ConvertedType::TIME_MICROS;
-    col_schema.stats_dtype    = statistics_dtype::dtype_int64;
+    col_schema.type                                = Type::INT64;
+    col_schema.converted_type                      = ConvertedType::TIME_MICROS;
+    col_schema.stats_dtype                         = statistics_dtype::dtype_int64;
+    col_schema.logical_type.isset.TIME             = true;
+    col_schema.logical_type.TIME.unit.isset.MICROS = true;
   }
 
   //  unsupported outside cudf for parquet 1.0.
   template <typename T>
   std::enable_if_t<std::is_same_v<T, cudf::duration_ns>, void> operator()()
   {
-    col_schema.type           = Type::INT64;
-    col_schema.converted_type = ConvertedType::TIME_MICROS;
-    col_schema.stats_dtype    = statistics_dtype::dtype_int64;
-    col_schema.ts_scale       = -1000;  // negative value indicates division by absolute value
+    col_schema.type                               = Type::INT64;
+    col_schema.stats_dtype                        = statistics_dtype::dtype_int64;
+    col_schema.logical_type.isset.TIME            = true;
+    col_schema.logical_type.TIME.unit.isset.NANOS = true;
   }
 
   template <typename T>
diff --git a/cpp/src/io/statistics/statistics_type_identification.cuh b/cpp/src/io/statistics/statistics_type_identification.cuh
index 10a7518aefa..9fc30c625aa 100644
--- a/cpp/src/io/statistics/statistics_type_identification.cuh
+++ b/cpp/src/io/statistics/statistics_type_identification.cuh
@@ -74,8 +74,7 @@ struct conversion_map<io_file_format::PARQUET, is_int96_timestamp::YES> {
 template <>
 struct conversion_map<io_file_format::PARQUET, is_int96_timestamp::NO> {
   using types = std::tuple<std::pair<cudf::timestamp_s, cudf::timestamp_ms>,
-                           std::pair<cudf::duration_s, cudf::duration_ms>,
-                           std::pair<cudf::duration_ns, cudf::duration_us>>;
+                           std::pair<cudf::duration_s, cudf::duration_ms>>;
 };
 
 /**
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index 9bb2aa207e4..ba457e2738d 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -33,6 +33,7 @@
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 #include <cudf/table/table_view.hpp>
+#include <cudf/unary.hpp>
 #include <cudf/utilities/span.hpp>
 
 #include <src/io/parquet/compact_protocol_reader.hpp>
@@ -43,6 +44,7 @@
 #include <thrust/iterator/counting_iterator.h>
 
 #include <fstream>
+#include <random>
 #include <type_traits>
 
 template <typename T, typename SourceElementT = T>
@@ -462,7 +464,70 @@ TYPED_TEST(ParquetWriterNumericTypeTest, SingleColumnWithNulls)
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
 
-TYPED_TEST(ParquetWriterChronoTypeTest, Chronos)
+template <typename mask_op_t>
+void test_durations(mask_op_t mask_op)
+{
+  std::default_random_engine generator;
+  std::uniform_int_distribution<int> distribution_d(0, 30);
+  auto sequence_d = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return distribution_d(generator); });
+
+  std::uniform_int_distribution<int> distribution_s(0, 86400);
+  auto sequence_s = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return distribution_s(generator); });
+
+  std::uniform_int_distribution<int> distribution(0, 86400 * 1000);
+  auto sequence = cudf::detail::make_counting_transform_iterator(
+    0, [&](auto i) { return distribution(generator); });
+
+  auto mask = cudf::detail::make_counting_transform_iterator(0, mask_op);
+
+  constexpr auto num_rows = 100;
+  // Durations longer than a day are not exactly valid, but cudf should be able to round trip
+  auto durations_d = cudf::test::fixed_width_column_wrapper<cudf::duration_D, int64_t>(
+    sequence_d, sequence_d + num_rows, mask);
+  auto durations_s = cudf::test::fixed_width_column_wrapper<cudf::duration_s, int64_t>(
+    sequence_s, sequence_s + num_rows, mask);
+  auto durations_ms = cudf::test::fixed_width_column_wrapper<cudf::duration_ms, int64_t>(
+    sequence, sequence + num_rows, mask);
+  auto durations_us = cudf::test::fixed_width_column_wrapper<cudf::duration_us, int64_t>(
+    sequence, sequence + num_rows, mask);
+  auto durations_ns = cudf::test::fixed_width_column_wrapper<cudf::duration_ns, int64_t>(
+    sequence, sequence + num_rows, mask);
+
+  auto expected = table_view{{durations_d, durations_s, durations_ms, durations_us, durations_ns}};
+
+  auto filepath = temp_env->get_temp_filepath("Durations.parquet");
+  cudf::io::parquet_writer_options out_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
+  cudf::io::write_parquet(out_opts);
+
+  cudf::io::parquet_reader_options in_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath});
+  auto result = cudf::io::read_parquet(in_opts);
+
+  auto durations_d_got =
+    cudf::cast(result.tbl->view().column(0), cudf::data_type{cudf::type_id::DURATION_DAYS});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_d, durations_d_got->view());
+
+  auto durations_s_got =
+    cudf::cast(result.tbl->view().column(1), cudf::data_type{cudf::type_id::DURATION_SECONDS});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_s, durations_s_got->view());
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_ms, result.tbl->view().column(2));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_us, result.tbl->view().column(3));
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(durations_ns, result.tbl->view().column(4));
+}
+
+TEST_F(ParquetWriterTest, Durations)
+{
+  test_durations([](auto i) { return true; });
+  test_durations([](auto i) { return (i % 2) != 0; });
+  test_durations([](auto i) { return (i % 3) != 0; });
+  test_durations([](auto i) { return false; });
+}
+
+TYPED_TEST(ParquetWriterTimestampTypeTest, Timestamps)
 {
   auto sequence = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return ((std::rand() / 10000) * 1000); });
@@ -474,7 +539,7 @@ TYPED_TEST(ParquetWriterChronoTypeTest, Chronos)
 
   auto expected = table_view{{col}};
 
-  auto filepath = temp_env->get_temp_filepath("Chronos.parquet");
+  auto filepath = temp_env->get_temp_filepath("Timestamps.parquet");
   cudf::io::parquet_writer_options out_opts =
     cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
   cudf::io::write_parquet(out_opts);
@@ -487,7 +552,7 @@ TYPED_TEST(ParquetWriterChronoTypeTest, Chronos)
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view());
 }
 
-TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls)
+TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampsWithNulls)
 {
   auto sequence = cudf::detail::make_counting_transform_iterator(
     0, [](auto i) { return ((std::rand() / 10000) * 1000); });
@@ -500,7 +565,7 @@ TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls)
 
   auto expected = table_view{{col}};
 
-  auto filepath = temp_env->get_temp_filepath("ChronosWithNulls.parquet");
+  auto filepath = temp_env->get_temp_filepath("TimestampsWithNulls.parquet");
   cudf::io::parquet_writer_options out_opts =
     cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected);
   cudf::io::write_parquet(out_opts);
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 2ac1dfda344..d3eceeddc10 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2280,6 +2280,8 @@ def test_parquet_writer_statistics(tmpdir, pdf, add_nulls):
         # pandas which interferes with series.max()/min()
         for t in TIMEDELTA_TYPES:
             pdf["col_" + t] = pd.Series(np.arange(len(pdf.index))).astype(t)
+        # pyarrow can't read values with non-zero nanoseconds
+        pdf["col_timedelta64[ns]"] = pdf["col_timedelta64[ns]"] * 1000
 
     gdf = cudf.from_pandas(pdf)
     if add_nulls:
@@ -2662,3 +2664,57 @@ def test_parquet_writer_zstd():
     else:
         got = pd.read_orc(buff)
         assert_eq(expected, got)
+
+
+def test_parquet_writer_time_delta_physical_type():
+    df = cudf.DataFrame(
+        {
+            "s": cudf.Series([1], dtype="timedelta64[s]"),
+            "ms": cudf.Series([2], dtype="timedelta64[ms]"),
+            "us": cudf.Series([3], dtype="timedelta64[us]"),
+            # 4K because Pandas/pyarrow don't support non-zero nanoseconds
+            # in Parquet files
+            "ns": cudf.Series([4000], dtype="timedelta64[ns]"),
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+
+    got = pd.read_parquet(buffer)
+    expected = pd.DataFrame(
+        {
+            "s": ["00:00:01"],
+            "ms": ["00:00:00.002000"],
+            "us": ["00:00:00.000003"],
+            "ns": ["00:00:00.000004"],
+        },
+        dtype="str",
+    )
+    assert_eq(got.astype("str"), expected)
+
+
+def test_parquet_roundtrip_time_delta():
+    num_rows = 12345
+    df = cudf.DataFrame(
+        {
+            "s": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[s]",
+            ),
+            "ms": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[ms]",
+            ),
+            "us": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[us]",
+            ),
+            "ns": cudf.Series(
+                random.sample(range(0, 200000), num_rows),
+                dtype="timedelta64[ns]",
+            ),
+        }
+    )
+    buffer = BytesIO()
+    df.to_parquet(buffer)
+    assert_eq(df, cudf.read_parquet(buffer))

From ac3f20542ef6f7d2942015c27292878314b7bdbd Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 1 Nov 2022 18:49:21 -0400
Subject: [PATCH 108/202] Port thrust's pinned_allocator to cudf, since Thrust
 1.17 removes the type (#12004)

Thrust 1.17 removes the experimental/pinned_allocator. While Thrust offers a replacement in `thrust::system::cuda::universal_host_pinned_memory_resource`. In doing so we also need to move the consumers to being CUDA sources which would negatively impact our compile time.

Instead we move Thrust's removed pinned_allocator into cudf as it allows usage from C++ sources and doesn't
require larger changes to handle the fact the value_type from the container becomes `thrust::pointer<T>`.

Note: We haven't seen a compile failure up to this point due to the fact that all CUDA 11.X toolkits provide a version
of thrust that has the experimental header. So when it wasn't found in our 1.17.2 location the compiler would fallback
to the one in the CTK.  We can't rely on this behavior moving forward.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Tobias Ribizel (https://github.com/upsj)
  - David Wendt (https://github.com/davidwendt)
  - Jake Hemstad (https://github.com/jrhemstad)
  - Mark Sadang (https://github.com/msadang)

URL: https://github.com/rapidsai/cudf/pull/12004
---
 conda/recipes/libcudf/meta.yaml               |   3 +-
 cpp/benchmarks/io/text/multibyte_split.cpp    |   9 +-
 .../detail/utilities/pinned_allocator.hpp     | 202 ++++++++++++++++++
 cpp/include/cudf/utilities/span.hpp           |   4 +-
 cpp/src/io/text/bgzip_data_chunk_source.cu    |   5 +-
 .../io/text/data_chunk_source_factories.cpp   |   6 +-
 cpp/src/io/utilities/hostdevice_vector.hpp    |   4 +-
 7 files changed, 217 insertions(+), 16 deletions(-)
 create mode 100644 cpp/include/cudf/detail/utilities/pinned_allocator.hpp

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 0687e76a356..1d0153c94be 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -114,9 +114,10 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/unary.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/alignment.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/default_stream.hpp
-        - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/int_fastdiv.h
         - test -f $PREFIX/include/cudf/detail/utilities/integer_utils.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/linked_column.hpp
+        - test -f $PREFIX/include/cudf/detail/utilities/pinned_allocator.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/vector_factories.hpp
         - test -f $PREFIX/include/cudf/detail/utilities/visitor_overload.hpp
         - test -f $PREFIX/include/cudf/dictionary/detail/concatenate.hpp
diff --git a/cpp/benchmarks/io/text/multibyte_split.cpp b/cpp/benchmarks/io/text/multibyte_split.cpp
index 56ac4d4ab73..75db8e36689 100644
--- a/cpp/benchmarks/io/text/multibyte_split.cpp
+++ b/cpp/benchmarks/io/text/multibyte_split.cpp
@@ -23,6 +23,7 @@
 #include <cudf_test/file_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/utilities/pinned_allocator.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
@@ -33,7 +34,6 @@
 #include <cudf/utilities/default_stream.hpp>
 
 #include <thrust/host_vector.h>
-#include <thrust/system/cuda/experimental/pinned_allocator.h>
 #include <thrust/transform.h>
 
 #include <nvbench/nvbench.cuh>
@@ -136,10 +136,9 @@ static void bench_multibyte_split(nvbench::state& state,
 
   auto const delim_factor = static_cast<double>(delim_percent) / 100;
   std::unique_ptr<cudf::io::datasource> datasource;
-  auto device_input = create_random_input(file_size_approx, delim_factor, 0.05, delim);
-  auto host_input   = std::vector<char>{};
-  auto host_pinned_input =
-    thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>>{};
+  auto device_input      = create_random_input(file_size_approx, delim_factor, 0.05, delim);
+  auto host_input        = std::vector<char>{};
+  auto host_pinned_input = thrust::host_vector<char, cudf::detail::pinned_allocator<char>>{};
 
   if (source_type != data_chunk_source_type::device &&
       source_type != data_chunk_source_type::host_pinned) {
diff --git a/cpp/include/cudf/detail/utilities/pinned_allocator.hpp b/cpp/include/cudf/detail/utilities/pinned_allocator.hpp
new file mode 100644
index 00000000000..84abf7c014f
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/pinned_allocator.hpp
@@ -0,0 +1,202 @@
+/*
+ *  Copyright 2008-2022 NVIDIA Corporation
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+
+#pragma once
+
+#include <cstddef>
+#include <limits>
+#include <new>  // for bad_alloc
+
+#include <cudf/utilities/error.hpp>
+
+namespace cudf::detail {
+
+/*! \p pinned_allocator is a CUDA-specific host memory allocator
+ *  that employs \c cudaMallocHost for allocation.
+ *
+ * This implementation is ported from the experimental/pinned_allocator
+ * that Thrust used to provide.
+ *
+ *  \see https://en.cppreference.com/w/cpp/memory/allocator
+ */
+template <typename T>
+class pinned_allocator;
+
+/*! \p pinned_allocator is a CUDA-specific host memory allocator
+ *  that employs \c cudaMallocHost for allocation.
+ *
+ * This implementation is ported from the experimental/pinned_allocator
+ * that Thrust used to provide.
+ *
+ *  \see https://en.cppreference.com/w/cpp/memory/allocator
+ */
+template <>
+class pinned_allocator<void> {
+ public:
+  using value_type      = void;            ///< The type of the elements in the allocator
+  using pointer         = void*;           ///< The type returned by address() / allocate()
+  using const_pointer   = const void*;     ///< The type returned by address()
+  using size_type       = std::size_t;     ///< The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+
+  /**
+   * @brief converts a `pinned_allocator<void>` to `pinned_allocator<U>`
+   */
+  template <typename U>
+  struct rebind {
+    using other = pinned_allocator<U>;  ///< The rebound type
+  };
+};
+
+/*! \p pinned_allocator is a CUDA-specific host memory allocator
+ *  that employs \c cudaMallocHost for allocation.
+ *
+ * This implementation is ported from the experimental/pinned_allocator
+ * that Thrust used to provide.
+ *
+ *  \see https://en.cppreference.com/w/cpp/memory/allocator
+ */
+template <typename T>
+class pinned_allocator {
+ public:
+  using value_type      = T;               ///< The type of the elements in the allocator
+  using pointer         = T*;              ///< The type returned by address() / allocate()
+  using const_pointer   = const T*;        ///< The type returned by address()
+  using reference       = T&;              ///< The parameter type for address()
+  using const_reference = const T&;        ///< The parameter type for address()
+  using size_type       = std::size_t;     ///< The type used for the size of the allocation
+  using difference_type = std::ptrdiff_t;  ///< The type of the distance between two pointers
+
+  /**
+   * @brief converts a `pinned_allocator<T>` to `pinned_allocator<U>`
+   */
+  template <typename U>
+  struct rebind {
+    using other = pinned_allocator<U>;  ///< The rebound type
+  };
+
+  /**
+   * @brief pinned_allocator's null constructor does nothing.
+   */
+  __host__ __device__ inline pinned_allocator() {}
+
+  /**
+   * @brief pinned_allocator's null destructor does nothing.
+   */
+  __host__ __device__ inline ~pinned_allocator() {}
+
+  /**
+   * @brief pinned_allocator's copy constructor does nothing.
+   */
+  __host__ __device__ inline pinned_allocator(pinned_allocator const&) {}
+
+  /**
+   * @brief  pinned_allocator's copy constructor does nothing.
+   *
+   *  This version of pinned_allocator's copy constructor
+   *  is templated on the \c value_type of the pinned_allocator
+   *  to copy from.  It is provided merely for convenience; it
+   *  does nothing.
+   */
+  template <typename U>
+  __host__ __device__ inline pinned_allocator(pinned_allocator<U> const&)
+  {
+  }
+
+  /**
+   * @brief This method returns the address of a \c reference of
+   *  interest.
+   *
+   *  @param r The \c reference of interest.
+   *  @return \c r's address.
+   */
+  __host__ __device__ inline pointer address(reference r) { return &r; }
+
+  /**
+   * @brief This method returns the address of a \c const_reference
+   *  of interest.
+   *
+   *  @param r The \c const_reference of interest.
+   *  @return \c r's address.
+   */
+  __host__ __device__ inline const_pointer address(const_reference r) { return &r; }
+
+  /**
+   * @brief This method allocates storage for objects in pinned host
+   *  memory.
+   *
+   *  @param cnt The number of objects to allocate.
+   *  @return a \c pointer to the newly allocated objects.
+   *  @note The second parameter to this function is meant as a
+   *        hint pointer to a nearby memory location, but is
+   *        not used by this allocator.
+   *  @note This method does not invoke \p value_type's constructor.
+   *        It is the responsibility of the caller to initialize the
+   *        objects at the returned \c pointer.
+   */
+  __host__ inline pointer allocate(size_type cnt, const_pointer /*hint*/ = 0)
+  {
+    if (cnt > this->max_size()) { throw std::bad_alloc(); }  // end if
+
+    pointer result(0);
+    CUDF_CUDA_TRY(cudaMallocHost(reinterpret_cast<void**>(&result), cnt * sizeof(value_type)));
+    return result;
+  }
+
+  /**
+   * @brief This method deallocates pinned host memory previously allocated
+   *  with this \c pinned_allocator.
+   *
+   *  @param p A \c pointer to the previously allocated memory.
+   *  @note The second parameter is the number of objects previously allocated
+   *        but is ignored by this allocator.
+   *  @note This method does not invoke \p value_type's destructor.
+   *        It is the responsibility of the caller to destroy
+   *        the objects stored at \p p.
+   */
+  __host__ inline void deallocate(pointer p, size_type /*cnt*/) { CUDF_CUDA_TRY(cudaFreeHost(p)); }
+
+  /**
+   * @brief This method returns the maximum size of the \c cnt parameter
+   *  accepted by the \p allocate() method.
+   *
+   *  @return The maximum number of objects that may be allocated
+   *          by a single call to \p allocate().
+   */
+  inline size_type max_size() const { return (std::numeric_limits<size_type>::max)() / sizeof(T); }
+
+  /**
+   * @brief This method tests this \p pinned_allocator for equality to
+   *  another.
+   *
+   *  @param x The other \p pinned_allocator of interest.
+   *  @return This method always returns \c true.
+   */
+  __host__ __device__ inline bool operator==(pinned_allocator const& x) const { return true; }
+
+  /**
+   * @brief This method tests this \p pinned_allocator for inequality
+   *  to another.
+   *
+   *  @param x The other \p pinned_allocator of interest.
+   *  @return This method always returns \c false.
+   */
+  __host__ __device__ inline bool operator!=(pinned_allocator const& x) const
+  {
+    return !operator==(x);
+  }
+};
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index dcb9786bbd2..074e8d25bf7 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -226,7 +226,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
                      std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
                                              std::declval<C&>().data()))> (*)[],
                                            T (*)[]>>* = nullptr>
-  constexpr host_span(C& in) : base(in.data(), in.size())
+  constexpr host_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
 
@@ -239,7 +239,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
                      std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
                                              std::declval<C&>().data()))> (*)[],
                                            T (*)[]>>* = nullptr>
-  constexpr host_span(C const& in) : base(in.data(), in.size())
+  constexpr host_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
 
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 7f1f6688bec..22955deeabb 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -19,6 +19,7 @@
 #include "io/utilities/config_utils.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/pinned_allocator.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 #include <cudf/io/text/detail/bgzip_utils.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -30,7 +31,6 @@
 
 #include <thrust/host_vector.h>
 #include <thrust/iterator/zip_iterator.h>
-#include <thrust/system/cuda/experimental/pinned_allocator.h>
 #include <thrust/transform.h>
 
 #include <fstream>
@@ -65,8 +65,7 @@ struct bgzip_nvcomp_transform_functor {
 class bgzip_data_chunk_reader : public data_chunk_reader {
  private:
   template <typename T>
-  using pinned_host_vector =
-    thrust::host_vector<T, thrust::system::cuda::experimental::pinned_allocator<T>>;
+  using pinned_host_vector = thrust::host_vector<T, cudf::detail::pinned_allocator<T>>;
 
   template <typename T>
   static void copy_to_device(const pinned_host_vector<T>& host,
diff --git a/cpp/src/io/text/data_chunk_source_factories.cpp b/cpp/src/io/text/data_chunk_source_factories.cpp
index b910037c5d2..c09e7be507f 100644
--- a/cpp/src/io/text/data_chunk_source_factories.cpp
+++ b/cpp/src/io/text/data_chunk_source_factories.cpp
@@ -17,12 +17,12 @@
 #include "io/text/device_data_chunks.hpp"
 
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/pinned_allocator.hpp>
 #include <cudf/io/text/data_chunk_source_factories.hpp>
 
 #include <rmm/device_buffer.hpp>
 
 #include <thrust/host_vector.h>
-#include <thrust/system/cuda/experimental/pinned_allocator.h>
 
 #include <fstream>
 
@@ -37,7 +37,7 @@ namespace {
 class datasource_chunk_reader : public data_chunk_reader {
   struct host_ticket {
     cudaEvent_t event;
-    thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>> buffer;
+    thrust::host_vector<char, cudf::detail::pinned_allocator<char>> buffer;
   };
 
   constexpr static int num_tickets = 2;
@@ -117,7 +117,7 @@ class datasource_chunk_reader : public data_chunk_reader {
 class istream_data_chunk_reader : public data_chunk_reader {
   struct host_ticket {
     cudaEvent_t event;
-    thrust::host_vector<char, thrust::system::cuda::experimental::pinned_allocator<char>> buffer;
+    thrust::host_vector<char, cudf::detail::pinned_allocator<char>> buffer;
   };
 
   constexpr static int num_tickets = 2;
diff --git a/cpp/src/io/utilities/hostdevice_vector.hpp b/cpp/src/io/utilities/hostdevice_vector.hpp
index 6e34d862ed4..77dade24009 100644
--- a/cpp/src/io/utilities/hostdevice_vector.hpp
+++ b/cpp/src/io/utilities/hostdevice_vector.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf/detail/utilities/pinned_allocator.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
@@ -24,7 +25,6 @@
 #include <rmm/device_buffer.hpp>
 
 #include <thrust/host_vector.h>
-#include <thrust/system/cuda/experimental/pinned_allocator.h>
 
 /**
  * @brief A helper class that wraps fixed-length device memory for the GPU, and
@@ -126,7 +126,7 @@ class hostdevice_vector {
   }
 
  private:
-  thrust::host_vector<T, thrust::system::cuda::experimental::pinned_allocator<T>> h_data;
+  thrust::host_vector<T, cudf::detail::pinned_allocator<T>> h_data;
   rmm::device_uvector<T> d_data;
 };
 

From 03034af5e0fdd2dd07fcd597045f28a7f8c4e7c3 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 1 Nov 2022 18:49:28 -0500
Subject: [PATCH 109/202] Standardize newlines at ends of files. (#12042)

This PR makes all files end with exactly one newline and enforces that rule with a pre-commit hook. The vast majority of files already comply with this rule, which improves consistency in the library's code style.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Mark Sadang (https://github.com/msadang)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Robert Maynard (https://github.com/robertmaynard)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12042
---
 .pre-commit-config.yaml                                   | 8 ++++++++
 cpp/benchmarks/reduction/rank.cpp                         | 2 +-
 cpp/include/cudf/detail/utilities/algorithm.cuh           | 2 +-
 cpp/include/cudf/detail/utilities/linked_column.hpp       | 2 +-
 cpp/include/cudf_test/detail/column_utilities.hpp         | 4 ++--
 cpp/src/text/subword/detail/codepoint_metadata.ah         | 1 -
 cpp/tests/strings/json_tests.cpp                          | 2 +-
 .../_templates/autosummary/class_with_autosummary.rst     | 2 +-
 .../_templates/autosummary/class_without_autosummary.rst  | 2 +-
 docs/cudf/source/api_docs/window.rst                      | 1 -
 docs/cudf/source/user_guide/cudf.CategoricalDtype.rst     | 2 +-
 docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst      | 2 +-
 docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst       | 2 +-
 docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst       | 2 +-
 docs/cudf/source/user_guide/cudf.ListDtype.rst            | 2 +-
 docs/cudf/source/user_guide/cudf.StructDtype.rst          | 2 +-
 java/ci/README.md                                         | 1 -
 .../main/java/ai/rapids/cudf/DeviceMemoryBufferView.java  | 2 +-
 .../main/java/ai/rapids/cudf/ReplacePolicyWithColumn.java | 2 +-
 .../src/main/java/ai/rapids/cudf/StreamedTableReader.java | 2 +-
 java/src/main/java/ai/rapids/cudf/WindowOptions.java      | 2 +-
 java/src/main/native/.clang-format                        | 1 -
 java/src/main/native/src/dtype_utils.hpp                  | 4 ++--
 .../test/java/ai/rapids/cudf/PinnedMemoryPoolTest.java    | 2 +-
 python/cudf/cudf/_fuzz_testing/tests/readme.md            | 2 +-
 python/cudf/cudf/benchmarks/README.md                     | 2 --
 python/cudf_kafka/setup.cfg                               | 1 -
 27 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2b52b040672..cadb19c395b 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,14 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 repos:
+      - repo: https://github.com/pre-commit/pre-commit-hooks
+        rev: v4.3.0
+        hooks:
+              - id: end-of-file-fixer
+                exclude: |
+                  (?x)^(
+                    ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
+                  )
       - repo: https://github.com/PyCQA/isort
         rev: 5.10.1
         hooks:
diff --git a/cpp/benchmarks/reduction/rank.cpp b/cpp/benchmarks/reduction/rank.cpp
index c20f728e018..5022e029d97 100644
--- a/cpp/benchmarks/reduction/rank.cpp
+++ b/cpp/benchmarks/reduction/rank.cpp
@@ -61,4 +61,4 @@ NVBENCH_BENCH_TYPES(nvbench_reduction_scan, NVBENCH_TYPE_AXES(data_type))
                     1000000,    // 1M
                     10000000,   // 10M
                     100000000,  // 100M
-                  });
\ No newline at end of file
+                  });
diff --git a/cpp/include/cudf/detail/utilities/algorithm.cuh b/cpp/include/cudf/detail/utilities/algorithm.cuh
index f05a09a8df1..4e83e219072 100644
--- a/cpp/include/cudf/detail/utilities/algorithm.cuh
+++ b/cpp/include/cudf/detail/utilities/algorithm.cuh
@@ -25,4 +25,4 @@ __device__ __forceinline__ T accumulate(Iterator first, Iterator last, T init, B
   }
   return init;
 }
-}  // namespace cudf::detail
\ No newline at end of file
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/utilities/linked_column.hpp b/cpp/include/cudf/detail/utilities/linked_column.hpp
index 05b46cc8e13..059e32730e5 100644
--- a/cpp/include/cudf/detail/utilities/linked_column.hpp
+++ b/cpp/include/cudf/detail/utilities/linked_column.hpp
@@ -77,4 +77,4 @@ inline LinkedColVector table_to_linked_columns(table_view const& table)
   return LinkedColVector(linked_it, linked_it + table.num_columns());
 }
 
-}  // namespace cudf::detail
\ No newline at end of file
+}  // namespace cudf::detail
diff --git a/cpp/include/cudf_test/detail/column_utilities.hpp b/cpp/include/cudf_test/detail/column_utilities.hpp
index ddf3b658a86..f8270f61f10 100644
--- a/cpp/include/cudf_test/detail/column_utilities.hpp
+++ b/cpp/include/cudf_test/detail/column_utilities.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -82,4 +82,4 @@ std::vector<std::string> to_strings(cudf::column_view const& col, std::string co
 
 }  // namespace detail
 }  // namespace test
-}  // namespace cudf
\ No newline at end of file
+}  // namespace cudf
diff --git a/cpp/src/text/subword/detail/codepoint_metadata.ah b/cpp/src/text/subword/detail/codepoint_metadata.ah
index bc56d6c4ba5..794d14e4b6c 100644
--- a/cpp/src/text/subword/detail/codepoint_metadata.ah
+++ b/cpp/src/text/subword/detail/codepoint_metadata.ah
@@ -13464,4 +13464,3 @@ uint64_t aux_cp_data_119134_119232[] = {
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 0,0,0,0,0,0,0,0,0,0,511706698612736,511706698612736,511706698731886,511706698731886,511706698731887,511706698731887
 };
-
diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/strings/json_tests.cpp
index c533eed48df..983ccaebb44 100644
--- a/cpp/tests/strings/json_tests.cpp
+++ b/cpp/tests/strings/json_tests.cpp
@@ -1012,4 +1012,4 @@ TEST_F(JsonPathTests, MissingFieldsAsNulls)
   do_test("$.tup[*].array", "[[1,2],[3,4]]", "[[1,2],null,[3,4],null]");
   do_test("$.x[*].array", "", "null", false);
   do_test("$.tup[*].a.x", "[\"5\"]", "[null,null,null,\"5\"]");
-}
\ No newline at end of file
+}
diff --git a/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst
index f86822bc567..a9c9bd2b650 100644
--- a/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst
+++ b/docs/cudf/source/_templates/autosummary/class_with_autosummary.rst
@@ -30,4 +30,4 @@
       {%- endfor %}
 
 {% endif %}
-{% endblock %}
\ No newline at end of file
+{% endblock %}
diff --git a/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst b/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst
index b57a7ceebb0..6676c672b20 100644
--- a/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst
+++ b/docs/cudf/source/_templates/autosummary/class_without_autosummary.rst
@@ -3,4 +3,4 @@
 
 .. currentmodule:: {{ module }}
 
-.. autoclass:: {{ objname }}
\ No newline at end of file
+.. autoclass:: {{ objname }}
diff --git a/docs/cudf/source/api_docs/window.rst b/docs/cudf/source/api_docs/window.rst
index 36857cbde07..6d96cc3e4bf 100644
--- a/docs/cudf/source/api_docs/window.rst
+++ b/docs/cudf/source/api_docs/window.rst
@@ -23,4 +23,3 @@ Rolling window functions
    Rolling.min
    Rolling.max
    Rolling.apply
-
diff --git a/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst b/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
index b43de86fc8f..1a8e709d938 100644
--- a/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
+++ b/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
@@ -16,4 +16,4 @@ cudf.CategoricalDtype
       CategoricalDtype.categories
       CategoricalDtype.ordered
       CategoricalDtype.from_pandas
-      CategoricalDtype.to_pandas
\ No newline at end of file
+      CategoricalDtype.to_pandas
diff --git a/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
index 20f6aea1299..cd0990faa3f 100644
--- a/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
+++ b/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
@@ -17,4 +17,4 @@ cudf.Decimal128Dtype
       Decimal128Dtype.scale
       Decimal128Dtype.itemsize
       Decimal128Dtype.to_arrow
-      Decimal128Dtype.from_arrow
\ No newline at end of file
+      Decimal128Dtype.from_arrow
diff --git a/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
index a92e695b4af..57067fdcf41 100644
--- a/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
+++ b/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
@@ -17,4 +17,4 @@ cudf.Decimal32Dtype
       Decimal32Dtype.scale
       Decimal32Dtype.itemsize
       Decimal32Dtype.to_arrow
-      Decimal32Dtype.from_arrow
\ No newline at end of file
+      Decimal32Dtype.from_arrow
diff --git a/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
index 3982ec7ad80..c855bf9da2c 100644
--- a/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
+++ b/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
@@ -17,4 +17,4 @@ cudf.Decimal64Dtype
       Decimal64Dtype.scale
       Decimal64Dtype.itemsize
       Decimal64Dtype.to_arrow
-      Decimal64Dtype.from_arrow
\ No newline at end of file
+      Decimal64Dtype.from_arrow
diff --git a/docs/cudf/source/user_guide/cudf.ListDtype.rst b/docs/cudf/source/user_guide/cudf.ListDtype.rst
index 6f37317c898..e5a90ddd2ed 100644
--- a/docs/cudf/source/user_guide/cudf.ListDtype.rst
+++ b/docs/cudf/source/user_guide/cudf.ListDtype.rst
@@ -16,4 +16,4 @@ cudf.ListDtype
       ListDtype.element_type
       ListDtype.leaf_type
       ListDtype.from_arrow
-      ListDtype.to_arrow
\ No newline at end of file
+      ListDtype.to_arrow
diff --git a/docs/cudf/source/user_guide/cudf.StructDtype.rst b/docs/cudf/source/user_guide/cudf.StructDtype.rst
index 68fa71b8231..4ef23995de2 100644
--- a/docs/cudf/source/user_guide/cudf.StructDtype.rst
+++ b/docs/cudf/source/user_guide/cudf.StructDtype.rst
@@ -15,4 +15,4 @@ cudf.StructDtype
       
       StructDtype.fields
       StructDtype.from_arrow
-      StructDtype.to_arrow
\ No newline at end of file
+      StructDtype.to_arrow
diff --git a/java/ci/README.md b/java/ci/README.md
index 538e18c37c5..a26fb1cba04 100644
--- a/java/ci/README.md
+++ b/java/ci/README.md
@@ -48,4 +48,3 @@ scl enable devtoolset-9 "java/ci/build-in-docker.sh"
 ### The output
 
 You can find the cuDF jar in java/target/ like cudf-22.12.0-SNAPSHOT-cuda11.jar.
-
diff --git a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBufferView.java b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBufferView.java
index 6e87f55e1ee..e48b1cf59e4 100644
--- a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBufferView.java
+++ b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBufferView.java
@@ -36,4 +36,4 @@ public class DeviceMemoryBufferView extends BaseDeviceMemoryBuffer {
   public synchronized final DeviceMemoryBufferView slice(long offset, long len) {
     throw new UnsupportedOperationException("Slice on view is not supported");
   }
-}
\ No newline at end of file
+}
diff --git a/java/src/main/java/ai/rapids/cudf/ReplacePolicyWithColumn.java b/java/src/main/java/ai/rapids/cudf/ReplacePolicyWithColumn.java
index 5702f623ee1..d0a072aaa2c 100644
--- a/java/src/main/java/ai/rapids/cudf/ReplacePolicyWithColumn.java
+++ b/java/src/main/java/ai/rapids/cudf/ReplacePolicyWithColumn.java
@@ -43,4 +43,4 @@ public boolean equals(Object other) {
   public int hashCode() {
     return 31 * column + policy.hashCode();
   }
-}
\ No newline at end of file
+}
diff --git a/java/src/main/java/ai/rapids/cudf/StreamedTableReader.java b/java/src/main/java/ai/rapids/cudf/StreamedTableReader.java
index aae86116c9b..d845edc058d 100644
--- a/java/src/main/java/ai/rapids/cudf/StreamedTableReader.java
+++ b/java/src/main/java/ai/rapids/cudf/StreamedTableReader.java
@@ -39,4 +39,4 @@ public interface StreamedTableReader extends AutoCloseable {
 
     @Override
     void close() throws CudfException;
-}
\ No newline at end of file
+}
diff --git a/java/src/main/java/ai/rapids/cudf/WindowOptions.java b/java/src/main/java/ai/rapids/cudf/WindowOptions.java
index 6dd59e0f2fc..6ab5c0525ca 100644
--- a/java/src/main/java/ai/rapids/cudf/WindowOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/WindowOptions.java
@@ -328,4 +328,4 @@ public void close() {
       followingCol.close();
     }
   }
-}
\ No newline at end of file
+}
diff --git a/java/src/main/native/.clang-format b/java/src/main/native/.clang-format
index 34ba71310cc..2b6bd9a27bb 100644
--- a/java/src/main/native/.clang-format
+++ b/java/src/main/native/.clang-format
@@ -202,4 +202,3 @@ Standard:        Cpp11
 TabWidth:        8
 UseTab:          Never
 ...
-
diff --git a/java/src/main/native/src/dtype_utils.hpp b/java/src/main/native/src/dtype_utils.hpp
index 53108ee7268..4de8a94182c 100644
--- a/java/src/main/native/src/dtype_utils.hpp
+++ b/java/src/main/native/src/dtype_utils.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -62,4 +62,4 @@ inline cudf::data_type make_data_type(jint out_dtype, jint scale) {
 }
 
 } // namespace jni
-} // namespace cudf
\ No newline at end of file
+} // namespace cudf
diff --git a/java/src/test/java/ai/rapids/cudf/PinnedMemoryPoolTest.java b/java/src/test/java/ai/rapids/cudf/PinnedMemoryPoolTest.java
index 66233c65362..16628d7be36 100644
--- a/java/src/test/java/ai/rapids/cudf/PinnedMemoryPoolTest.java
+++ b/java/src/test/java/ai/rapids/cudf/PinnedMemoryPoolTest.java
@@ -131,4 +131,4 @@ void testZeroSizedAllocation() {
     }
     assertEquals(poolSize, PinnedMemoryPool.getAvailableBytes());
   }
-}
\ No newline at end of file
+}
diff --git a/python/cudf/cudf/_fuzz_testing/tests/readme.md b/python/cudf/cudf/_fuzz_testing/tests/readme.md
index f3e02ad7ca5..3e30aa8c924 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/readme.md
+++ b/python/cudf/cudf/_fuzz_testing/tests/readme.md
@@ -97,4 +97,4 @@ def set_rand_params(self, params):
         else:
             params_dict[param] = np.random.choice(values)
     self._current_params["test_kwargs"] = self.process_kwargs(params_dict)
-```
\ No newline at end of file
+```
diff --git a/python/cudf/cudf/benchmarks/README.md b/python/cudf/cudf/benchmarks/README.md
index 0d704f2a825..5486176550a 100644
--- a/python/cudf/cudf/benchmarks/README.md
+++ b/python/cudf/cudf/benchmarks/README.md
@@ -30,5 +30,3 @@ pytest --use_buffer True cudf/benchmarks/
 ```
 pytest --dataset_dir directory_path cudf/benchmarks/
 ```
-
-
diff --git a/python/cudf_kafka/setup.cfg b/python/cudf_kafka/setup.cfg
index 51469097526..f884e67908b 100644
--- a/python/cudf_kafka/setup.cfg
+++ b/python/cudf_kafka/setup.cfg
@@ -41,4 +41,3 @@ skip=
     build
     dist
     __init__.py
-

From a20bbfbc4251ce42f209fcbb928a5ebfb2e9b86b Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 2 Nov 2022 06:42:47 -0500
Subject: [PATCH 110/202] Trim trailing whitespace from all files. (#12041)

This PR trims trailing whitespace from all files and adds a pre-commit hook to enforce that change for the future. The vast majority of files already comply with this rule, which improves consistency in the library's code style.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Mark Sadang (https://github.com/msadang)
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12041
---
 .github/labeler.yml                           |   4 +-
 .github/workflows/add_to_project.yml          |   2 +-
 .pre-commit-config.yaml                       |   5 +
 CODE_OF_CONDUCT.md                            |   2 +-
 ci/cpu/build.sh                               |   2 +-
 ci/gpu/build.sh                               |   2 +-
 cpp/cmake/config.json                         |   2 +-
 .../thrust_faster_sort_compile_times.diff     |   1 -
 ...ust_transform_iter_with_reduce_by_key.diff |   4 +-
 cpp/doxygen/DoxygenLayout.xml                 |  10 +-
 cpp/doxygen/main_page.md                      |   4 +-
 cpp/doxygen/unicode.md                        |   2 +-
 cpp/scripts/run-cmake-format.sh               |   2 +-
 cpp/tests/copying/scatter_list_tests.cpp      | 186 +++++++++---------
 .../copying/segmented_gather_list_tests.cpp   |   8 +-
 .../lists/combine/concatenate_rows_tests.cpp  |  64 +++---
 cpp/tests/lists/sort_lists_tests.cpp          |   8 +-
 cpp/tests/rolling/nth_element_test.cpp        |  94 ++++-----
 cpp/tests/strings/json_tests.cpp              |  40 ++--
 .../integration/unary-transform-test.cpp      |   8 +-
 docs/cudf/README.md                           |   2 +-
 docs/cudf/source/api_docs/groupby.rst         |   2 +-
 docs/cudf/source/api_docs/index_objects.rst   |   2 +-
 docs/cudf/source/api_docs/series.rst          |   4 +-
 docs/cudf/source/api_docs/string_handling.rst |   1 -
 .../cudf/source/api_docs/subword_tokenize.rst |   2 +-
 .../source/developer_guide/benchmarking.md    |   2 +-
 .../developer_guide/contributing_guide.md     |   2 +-
 .../source/developer_guide/documentation.md   |   8 +-
 .../source/developer_guide/library_design.md  |  16 +-
 .../user_guide/cudf.CategoricalDtype.rst      |   6 +-
 .../user_guide/cudf.Decimal128Dtype.rst       |   6 +-
 .../source/user_guide/cudf.Decimal32Dtype.rst |   6 +-
 .../source/user_guide/cudf.Decimal64Dtype.rst |   6 +-
 .../cudf/source/user_guide/cudf.ListDtype.rst |   6 +-
 .../source/user_guide/cudf.StructDtype.rst    |   6 +-
 docs/cudf/source/user_guide/data-types.md     |   2 +-
 java/README.md                                |   6 +-
 .../java/ai/rapids/cudf/BinaryOperable.java   |   2 +-
 java/src/main/java/ai/rapids/cudf/Cuda.java   |  10 +-
 .../main/java/ai/rapids/cudf/NvtxRange.java   |   2 +-
 .../ai/rapids/cudf/OutOfBoundsPolicy.java     |   2 +-
 .../ai/rapids/cudf/RollingAggregation.java    |   2 +-
 java/src/main/native/.clang-format            |  14 +-
 .../test/java/ai/rapids/cudf/TableTest.java   |  18 +-
 .../cudf/TimestampColumnVectorTest.java       |   2 +-
 .../cudf/cudf/_fuzz_testing/tests/readme.md   |   6 +-
 python/custreamz/README.md                    |   2 +-
 48 files changed, 299 insertions(+), 296 deletions(-)

diff --git a/.github/labeler.yml b/.github/labeler.yml
index 41fd3802d55..3a868ac7d45 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -3,14 +3,14 @@
 cuDF (Python):
   - 'python/**'
   - 'notebooks/**'
-  
+
 libcudf:
   - 'cpp/**'
 
 CMake:
   - '**/CMakeLists.txt'
   - '**/cmake/**'
-  
+
 cuDF (Java):
   - 'java/**'
 
diff --git a/.github/workflows/add_to_project.yml b/.github/workflows/add_to_project.yml
index 60f9d1e88d7..b301c56a999 100644
--- a/.github/workflows/add_to_project.yml
+++ b/.github/workflows/add_to_project.yml
@@ -4,7 +4,7 @@ on:
   issues:
     types:
       - opened
-      
+
   pull_request_target:
     types:
       - opened
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cadb19c395b..75d285f4f54 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,6 +4,11 @@ repos:
       - repo: https://github.com/pre-commit/pre-commit-hooks
         rev: v4.3.0
         hooks:
+              - id: trailing-whitespace
+                exclude: |
+                  (?x)^(
+                    ^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
+                  )
               - id: end-of-file-fixer
                 exclude: |
                   (?x)^(
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 3029fbb41af..563581d270d 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1 +1 @@
-This project has adopted the [Contributor Covenant Code of Conduct](https://docs.rapids.ai/resources/conduct/). 
+This project has adopted the [Contributor Covenant Code of Conduct](https://docs.rapids.ai/resources/conduct/).
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index 400a1ba4764..ada69fe0923 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -130,7 +130,7 @@ if [ "$BUILD_CUDF" == '1' ]; then
 
   gpuci_logger "Build conda pkg for custreamz"
   gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
-  
+
   gpuci_logger "Build conda pkg for strings_udf"
   gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/strings_udf --python=$PYTHON $CONDA_BUILD_ARGS $CONDA_CHANNEL
 
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index fc020c4ca1e..4d86d19a7d2 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -197,7 +197,7 @@ else
     # copied by CI from the upstream 11.5 jobs into $CONDA_ARTIFACT_PATH
     gpuci_logger "Installing cudf, dask-cudf, cudf_kafka, and custreamz"
     gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
-    
+
     gpuci_logger "Check current conda environment"
     conda list --show-channel-urls
 
diff --git a/cpp/cmake/config.json b/cpp/cmake/config.json
index 4f287499503..f7d7b001856 100644
--- a/cpp/cmake/config.json
+++ b/cpp/cmake/config.json
@@ -9,7 +9,7 @@
           "VERSION": "?",
           "GIT_SHALLOW": "?",
           "OPTIONS": "*",
-          "FIND_PACKAGE_ARGUMENTS": "*" 
+          "FIND_PACKAGE_ARGUMENTS": "*"
         }
       },
       "ConfigureTest": {
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
index e5d62e87ca4..864c89d4504 100644
--- a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
@@ -46,4 +46,3 @@ index 5d486789..b42fb5f0 100644
      for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
      {
        if (compare_op(keys[j + 1], keys[j]))
-       
diff --git a/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
index 035da3ef385..6a56af90d0d 100644
--- a/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
+++ b/cpp/cmake/thirdparty/patches/thrust_transform_iter_with_reduce_by_key.diff
@@ -5,7 +5,7 @@ index f512a36..a5f725d 100644
 @@ -102,6 +102,8 @@ template <typename InputFunction, typename OutputFunction, typename Iterator>
    /*! \endcond
     */
- 
+
 +  transform_input_output_iterator() = default;
 +
    /*! This constructor takes as argument a \c Iterator an \c InputFunction and an
@@ -18,7 +18,7 @@ index 66fb46a..4a68cb5 100644
 @@ -104,6 +104,8 @@ template <typename UnaryFunction, typename OutputIterator>
    /*! \endcond
     */
- 
+
 +  transform_output_iterator() = default;
 +
    /*! This constructor takes as argument an \c OutputIterator and an \c
diff --git a/cpp/doxygen/DoxygenLayout.xml b/cpp/doxygen/DoxygenLayout.xml
index a78a1cb701f..ded88dfe531 100644
--- a/cpp/doxygen/DoxygenLayout.xml
+++ b/cpp/doxygen/DoxygenLayout.xml
@@ -12,29 +12,29 @@
     </tab>
     <tab type="interfaces" visible="yes" title="">
       <tab type="interfacelist" visible="yes" title="" intro=""/>
-      <tab type="interfaceindex" visible="$ALPHABETICAL_INDEX" title=""/> 
+      <tab type="interfaceindex" visible="$ALPHABETICAL_INDEX" title=""/>
       <tab type="interfacehierarchy" visible="yes" title="" intro=""/>
     </tab>
     <tab type="classes" visible="yes" title="">
       <tab type="classlist" visible="yes" title="" intro=""/>
-      <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/> 
+      <tab type="classindex" visible="$ALPHABETICAL_INDEX" title=""/>
       <tab type="hierarchy" visible="yes" title="" intro=""/>
       <tab type="classmembers" visible="yes" title="" intro=""/>
     </tab>
     <tab type="structs" visible="yes" title="">
       <tab type="structlist" visible="yes" title="" intro=""/>
-      <tab type="structindex" visible="$ALPHABETICAL_INDEX" title=""/> 
+      <tab type="structindex" visible="$ALPHABETICAL_INDEX" title=""/>
     </tab>
     <tab type="exceptions" visible="yes" title="">
       <tab type="exceptionlist" visible="yes" title="" intro=""/>
-      <tab type="exceptionindex" visible="$ALPHABETICAL_INDEX" title=""/> 
+      <tab type="exceptionindex" visible="$ALPHABETICAL_INDEX" title=""/>
       <tab type="exceptionhierarchy" visible="yes" title="" intro=""/>
     </tab>
     <tab type="files" visible="yes" title="">
       <tab type="filelist" visible="yes" title="" intro=""/>
       <tab type="globals" visible="yes" title="" intro=""/>
     </tab>
-    <tab type="examples" visible="yes" title="" intro=""/>  
+    <tab type="examples" visible="yes" title="" intro=""/>
   </navindex>
 
   <!-- Layout definition for a class page -->
diff --git a/cpp/doxygen/main_page.md b/cpp/doxygen/main_page.md
index 85b7888b066..308d10601af 100644
--- a/cpp/doxygen/main_page.md
+++ b/cpp/doxygen/main_page.md
@@ -1,5 +1,5 @@
-# libcudf 
+# libcudf
 
-libcudf is a C++ GPU DataFrame library for loading, joining, aggregating, filtering, and otherwise 
+libcudf is a C++ GPU DataFrame library for loading, joining, aggregating, filtering, and otherwise
 manipulating data. A GPU DataFrame is a column-oriented tabular data structure, so libcudf provides
 two core data structures: cudf::column, and cudf::table.
diff --git a/cpp/doxygen/unicode.md b/cpp/doxygen/unicode.md
index d20a18ba34c..1ab09e110c1 100644
--- a/cpp/doxygen/unicode.md
+++ b/cpp/doxygen/unicode.md
@@ -2,7 +2,7 @@
 
 The strings column currently supports only UTF-8 characters internally.
 For functions that require character testing (e.g. cudf::strings::all_characters_of_type()) or
-case conversion (e.g. cudf::strings::capitalize(), etc) only the 16-bit [Unicode 13.0](http://www.unicode.org/versions/Unicode13.0.0) 
+case conversion (e.g. cudf::strings::capitalize(), etc) only the 16-bit [Unicode 13.0](http://www.unicode.org/versions/Unicode13.0.0)
 character code-points (0-65535) values are supported.
 Case conversion and character testing on characters above code-point 65535 are not supported.
 
diff --git a/cpp/scripts/run-cmake-format.sh b/cpp/scripts/run-cmake-format.sh
index b9157c76492..f3e21779aa5 100755
--- a/cpp/scripts/run-cmake-format.sh
+++ b/cpp/scripts/run-cmake-format.sh
@@ -17,7 +17,7 @@
 # and exits gracefully if the file is not found. If a user wishes to specify a
 # config file at a nonstandard location, they may do so by setting the
 # environment variable RAPIDS_CMAKE_FORMAT_FILE.
-# 
+#
 # This script can be invoked directly anywhere within the project repository.
 # Alternatively, it may be invoked as a pre-commit hook via
 # `pre-commit run (cmake-format)|(cmake-lint)`.
diff --git a/cpp/tests/copying/scatter_list_tests.cpp b/cpp/tests/copying/scatter_list_tests.cpp
index d262cbccd61..82e2366d0b9 100644
--- a/cpp/tests/copying/scatter_list_tests.cpp
+++ b/cpp/tests/copying/scatter_list_tests.cpp
@@ -506,12 +506,12 @@ TYPED_TEST(TypedScatterListsTest, ListsOfStructs)
 
   // clang-format off
   auto source_numerics = numerics_column{
-    9, 9, 9, 9, 
+    9, 9, 9, 9,
     8, 8, 8
   };
 
   auto source_strings = strings_column_wrapper{
-    "nine", "nine", "nine", "nine", 
+    "nine", "nine", "nine", "nine",
     "eight", "eight", "eight"
   };
   // clang-format on
@@ -523,20 +523,20 @@ TYPED_TEST(TypedScatterListsTest, ListsOfStructs)
 
   // clang-format off
   auto target_ints    = numerics_column{
-    0, 0, 
-    1, 1, 
-    2, 2, 
-    3, 3, 
-    4, 4, 
+    0, 0,
+    1, 1,
+    2, 2,
+    3, 3,
+    4, 4,
     5, 5
   };
 
   auto target_strings = strings_column_wrapper{
-    "zero",  "zero", 
-    "one",   "one", 
-    "two",   "two", 
-    "three", "three", 
-    "four",  "four", 
+    "zero",  "zero",
+    "one",   "one",
+    "two",   "two",
+    "three", "three",
+    "four",  "four",
     "five",  "five"
   };
   // clang-format on
@@ -554,9 +554,9 @@ TYPED_TEST(TypedScatterListsTest, ListsOfStructs)
 
   // clang-format off
   auto expected_numerics = numerics_column{
-    8, 8, 8, 
-    1, 1, 
-    9, 9, 9, 9, 
+    8, 8, 8,
+    1, 1,
+    9, 9, 9, 9,
     3, 3, 4, 4, 5, 5
   };
 
@@ -587,18 +587,18 @@ TYPED_TEST(TypedScatterListsTest, ListsOfStructsWithNullMembers)
 
   // clang-format off
   auto source_numerics = numerics_column{
-    { 
-      9, 9, 9, 9, 
-      8, 8, 8    
-    }, 
+    {
+      9, 9, 9, 9,
+      8, 8, 8
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })
   };
 
   auto source_strings = strings_column_wrapper{
     {
-      "nine",  "nine",  "nine", "nine", 
+      "nine",  "nine",  "nine", "nine",
       "eight", "eight", "eight"
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })
   };
   // clang-format on
@@ -610,20 +610,20 @@ TYPED_TEST(TypedScatterListsTest, ListsOfStructsWithNullMembers)
 
   // clang-format off
   auto target_ints    = numerics_column{
-    0, 0, 
-    1, 1, 
-    2, 2, 
-    3, 3, 
-    4, 4, 
+    0, 0,
+    1, 1,
+    2, 2,
+    3, 3,
+    4, 4,
     5, 5
   };
 
   auto target_strings = strings_column_wrapper{
-    "zero", "zero", 
-    "one",  "one", 
-    "two",  "two", 
-    "three","three", 
-    "four", "four", 
+    "zero", "zero",
+    "one",  "one",
+    "two",  "two",
+    "three","three",
+    "four", "four",
     "five", "five"
   };
   // clang-format on
@@ -643,13 +643,13 @@ TYPED_TEST(TypedScatterListsTest, ListsOfStructsWithNullMembers)
   // clang-format off
   auto expected_numerics = numerics_column{
     {
-      8, 8, 8, 
-      1, 1, 
-      9, 9, 9, 9, 
-      3, 3, 
-      4, 4, 
+      8, 8, 8,
+      1, 1,
+      9, 9, 9, 9,
+      3, 3,
+      4, 4,
       5, 5
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 8; })
   };
 
@@ -684,17 +684,17 @@ TYPED_TEST(TypedScatterListsTest, ListsOfNullStructs)
   // clang-format off
   auto source_numerics = numerics_column{
     {
-      9, 9, 9, 9, 
+      9, 9, 9, 9,
       8, 8, 8
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })
   };
 
   auto source_strings = strings_column_wrapper{
     {
-      "nine",  "nine",  "nine", "nine", 
+      "nine",  "nine",  "nine", "nine",
       "eight", "eight", "eight"
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })
   };
   // clang-format on
@@ -708,20 +708,20 @@ TYPED_TEST(TypedScatterListsTest, ListsOfNullStructs)
 
   // clang-format off
   auto target_ints    = numerics_column{
-    0, 0, 
-    1, 1, 
-    2, 2, 
-    3, 3, 
-    4, 4, 
+    0, 0,
+    1, 1,
+    2, 2,
+    3, 3,
+    4, 4,
     5, 5
   };
 
   auto target_strings = strings_column_wrapper{
-    "zero",  "zero", 
-    "one",   "one", 
-    "two",   "two", 
-    "three", "three", 
-    "four",  "four", 
+    "zero",  "zero",
+    "one",   "one",
+    "two",   "two",
+    "three", "three",
+    "four",  "four",
     "five",  "five"
   };
   // clang-format on
@@ -740,13 +740,13 @@ TYPED_TEST(TypedScatterListsTest, ListsOfNullStructs)
   // clang-format off
   auto expected_numerics = numerics_column{
     {
-      8, 8, 8, 
-      1, 1, 
-      9, 9, 9, 9, 
-      3, 3, 
-      4, 4, 
+      8, 8, 8,
+      1, 1,
+      9, 9, 9, 9,
+      3, 3,
+      4, 4,
       5, 5
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 6) && (i != 8); })
   };
 
@@ -783,17 +783,17 @@ TYPED_TEST(TypedScatterListsTest, EmptyListsOfStructs)
   // clang-format off
   auto source_numerics = numerics_column{
     {
-      9, 9, 9, 9, 
+      9, 9, 9, 9,
       8, 8, 8
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })
   };
 
   auto source_strings = strings_column_wrapper{
     {
-      "nine",  "nine",  "nine", "nine", 
+      "nine",  "nine",  "nine", "nine",
       "eight", "eight", "eight"
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })
   };
   // clang-format on
@@ -807,20 +807,20 @@ TYPED_TEST(TypedScatterListsTest, EmptyListsOfStructs)
 
   // clang-format off
   auto target_ints    = numerics_column{
-    0, 0, 
-    1, 1, 
-    2, 2, 
-    3, 3, 
-    4, 4, 
+    0, 0,
+    1, 1,
+    2, 2,
+    3, 3,
+    4, 4,
     5, 5
   };
 
   auto target_strings = strings_column_wrapper{
-    "zero",  "zero", 
-    "one",   "one", 
-    "two",   "two", 
-    "three", "three", 
-    "four",  "four", 
+    "zero",  "zero",
+    "one",   "one",
+    "two",   "two",
+    "three", "three",
+    "four",  "four",
     "five",  "five"
   };
   // clang-format on
@@ -839,10 +839,10 @@ TYPED_TEST(TypedScatterListsTest, EmptyListsOfStructs)
   // clang-format off
   auto expected_numerics = numerics_column{
     {
-      8, 8, 8, 
-      1, 1, 
-      9, 9, 9, 9, 
-      3, 3, 
+      8, 8, 8,
+      1, 1,
+      9, 9, 9, 9,
+      3, 3,
       5, 5
     },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 6) && (i != 8); })
@@ -880,17 +880,17 @@ TYPED_TEST(TypedScatterListsTest, NullListsOfStructs)
   // clang-format off
   auto source_numerics = numerics_column{
     {
-      9, 9, 9, 9, 
+      9, 9, 9, 9,
       8, 8, 8
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 3; })
   };
 
   auto source_strings = strings_column_wrapper{
     {
-      "nine",  "nine",  "nine", "nine", 
+      "nine",  "nine",  "nine", "nine",
       "eight", "eight", "eight"
-    }, 
+    },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 5; })
   };
   // clang-format on
@@ -911,19 +911,19 @@ TYPED_TEST(TypedScatterListsTest, NullListsOfStructs)
 
   // clang-format off
   auto target_ints    = numerics_column{
-    0, 0, 
-    1, 1, 
-    2, 2, 
-    3, 3, 
-    4, 4, 
+    0, 0,
+    1, 1,
+    2, 2,
+    3, 3,
+    4, 4,
     5, 5
   };
   auto target_strings = strings_column_wrapper{
-    "zero",  "zero", 
-    "one",   "one", 
-    "two",   "two", 
-    "three", "three", 
-    "four",  "four", 
+    "zero",  "zero",
+    "one",   "one",
+    "two",   "two",
+    "three", "three",
+    "four",  "four",
     "five",  "five"
   };
   // clang-format on
@@ -942,10 +942,10 @@ TYPED_TEST(TypedScatterListsTest, NullListsOfStructs)
   // clang-format off
   auto expected_numerics = numerics_column{
     {
-      8, 8, 8, 
-      1, 1, 
-      9, 9, 9, 9, 
-      3, 3, 
+      8, 8, 8,
+      1, 1,
+      9, 9, 9, 9,
+      3, 3,
       5, 5
     },
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return (i != 6) && (i != 8); })
diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp
index 180125b7880..c05db05d57c 100644
--- a/cpp/tests/copying/segmented_gather_list_tests.cpp
+++ b/cpp/tests/copying/segmented_gather_list_tests.cpp
@@ -159,11 +159,11 @@ TYPED_TEST(SegmentedGatherTest, GatherNested)
                                    {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {-17, -18}}};
     auto const gather_map = LCW<int>{{0, -2, -2}, {1}, {1, 0, -1, -5}};
     auto const results    = segmented_gather(lists_column_view{list}, lists_column_view{gather_map});
-    auto const expected   = LCW<T>{{{2, 3}, {2, 3}, {2, 3}}, 
-                                   {{9, 10, 11}}, 
+    auto const expected   = LCW<T>{{{2, 3}, {2, 3}, {2, 3}},
+                                   {{9, 10, 11}},
                                    {{17, 18}, {15, 16}, {-17, -18}, {15, 16}}};
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
-    // clang-format on 
+    // clang-format on
   }
 
   // List<List<T>>, with out-of-bounds gather indices.
@@ -286,7 +286,7 @@ TYPED_TEST(SegmentedGatherTest, GatherNegatives)
                                    {{6, 7, 8}, {9, 10, 11}, {12, 13, 14}},
                                    {{15, 16}, {17, 18}, {17, 18}, {17, 18}, {17, 18}}};
     auto const gather_map = LCW<int>{{-1, 0}, {-2, -1, -4}, {-6, -4, -3, -2, -1, 0}};
-    auto const results    = 
+    auto const results    =
       segmented_gather(lists_column_view{list}, lists_column_view{gather_map}, NULLIFY);
     auto const expected   = LCW<T>{{{4, 5}, {2, 3}},
                                    {{{9, 10, 11}, {12, 13, 14}, LCW<T>{}}, null_at(2)},
diff --git a/cpp/tests/lists/combine/concatenate_rows_tests.cpp b/cpp/tests/lists/combine/concatenate_rows_tests.cpp
index ed8bf8abb8d..2f268ff9996 100644
--- a/cpp/tests/lists/combine/concatenate_rows_tests.cpp
+++ b/cpp/tests/lists/combine/concatenate_rows_tests.cpp
@@ -547,9 +547,9 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNulls)
   // list<list<string>>
 
   // clang-format off
-  
-  // col 0  
-  cudf::test::lists_column_wrapper<cudf::string_view> 
+
+  // col 0
+  cudf::test::lists_column_wrapper<cudf::string_view>
     l0{ {
           {{{"whee", "yay", "bananas"}, nulls_at({1})}, {}},
           {{}},
@@ -559,23 +559,23 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNulls)
         }, nulls_at({3, 4}) };
 
   // col1
-  cudf::test::lists_column_wrapper<cudf::string_view> 
+  cudf::test::lists_column_wrapper<cudf::string_view>
     l1{ {
           {{}},
           {{"arg"}, {"mno", "ampere"}, {"gpu"}, {"def"}},
           {{{{"", "hhh"}, nulls_at({0})}, {"www"}},                               nulls_at({1})},
           {{"warp", "donuts", "parking"}, { "", "apply", "twelve", "mouse", "bbb"}, {"bbb", "pom"}, {}},
-          {{}} 
+          {{}}
         }, nulls_at({4}) };
 
   // col2
-  cudf::test::lists_column_wrapper<cudf::string_view> 
+  cudf::test::lists_column_wrapper<cudf::string_view>
     l2{ {
           {{"monitor", "sugar"}},
           {{"spurs", "garlic"}, {"onion", "shallot", "carrot"}},
           {{"cars", "trucks", "planes"}, {"abc"}, {"mno", "pqr"}},
           {{}, {"ram", "cpu", "disk"}, {}},
-          {{"round"}, {"square"}} 
+          {{"round"}, {"square"}}
         }, nulls_at({0, 4}) };
 
   // concatenate_policy::IGNORE_NULLS
@@ -584,39 +584,39 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNulls)
     cudf::table_view t({l0, l1, l2});
     auto result = cudf::lists::concatenate_rows(t, cudf::lists::concatenate_null_policy::IGNORE);
 
-    // expected  
+    // expected
     cudf::test::lists_column_wrapper<cudf::string_view>
-      expected{ {        
+      expected{ {
                   {{{"whee", "yay", "bananas"}, nulls_at({1})}, {}, {}},
                   {{}, {"arg"}, {"mno", "ampere"}, {"gpu"}, {"def"}, {"spurs", "garlic"}, {"onion", "shallot", "carrot"}},
-                  {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"}, 
-                    {{"", "hhh"}, nulls_at({0})}, {"www"}, {"cars", "trucks", "planes"}, {"abc"}, {"mno", "pqr"}},                           
+                  {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"},
+                    {{"", "hhh"}, nulls_at({0})}, {"www"}, {"cars", "trucks", "planes"}, {"abc"}, {"mno", "pqr"}},
                       nulls_at({0, 2, 4}) },
                   {{"warp", "donuts", "parking"}, { "", "apply", "twelve", "mouse", "bbb"}, {"bbb", "pom"}, {}, {}, {"ram", "cpu", "disk"}, {}},
                   {{}}
                 }, nulls_at({4}) };
-        
+
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
-  
+
   // concatenate_policy::NULLIFY_OUTPUT_ROW
   {
     // perform the concatenate
     cudf::table_view t({l0, l1, l2});
     auto result = cudf::lists::concatenate_rows(t, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
 
-    // expected  
+    // expected
     cudf::test::lists_column_wrapper<cudf::string_view>
-      expected{ {        
+      expected{ {
                   {{}},
                   {{}, {"arg"}, {"mno", "ampere"}, {"gpu"}, {"def"}, {"spurs", "garlic"}, {"onion", "shallot", "carrot"}},
-                  {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"}, 
-                    {{"", "hhh"}, nulls_at({0})}, {"www"}, {"cars", "trucks", "planes"}, {"abc"}, {"mno", "pqr"}},                           
+                  {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"},
+                    {{"", "hhh"}, nulls_at({0})}, {"www"}, {"cars", "trucks", "planes"}, {"abc"}, {"mno", "pqr"}},
                       nulls_at({0, 2, 4}) },
                   {{}},
                   {{}}
-                }, nulls_at({0, 3, 4}) };    
-        
+                }, nulls_at({0, 3, 4}) };
+
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
 
@@ -628,9 +628,9 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNullsSliced)
   // list<list<string>>
 
   // clang-format off
-  
-  // col 0  
-  cudf::test::lists_column_wrapper<cudf::string_view> 
+
+  // col 0
+  cudf::test::lists_column_wrapper<cudf::string_view>
     unsliced_l0{ {
           {{{"whee", "yay", "bananas"}, nulls_at({1})}, {}},
           {{}},
@@ -641,13 +641,13 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNullsSliced)
   auto l0 = cudf::split(unsliced_l0, {2})[1];
 
   // col1
-  cudf::test::lists_column_wrapper<cudf::string_view> 
+  cudf::test::lists_column_wrapper<cudf::string_view>
     unsliced_l1{ {
           {{}},
           {{"arg"}, {"mno", "ampere"}, {"gpu"}, {"def"}},
           {{{{"", "hhh"}, nulls_at({0})}, {"www"}},                               nulls_at({1})},
           {{"warp", "donuts", "parking"}, { "", "apply", "twelve", "mouse", "bbb"}, {"bbb", "pom"}, {}},
-          {{}} 
+          {{}}
         }, nulls_at({4}) };
   auto l1 = cudf::split(unsliced_l1, {2})[1];
 
@@ -657,14 +657,14 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNullsSliced)
     cudf::table_view t({l0, l1});
     auto result = cudf::lists::concatenate_rows(t, cudf::lists::concatenate_null_policy::IGNORE);
 
-    // expected  
+    // expected
     cudf::test::lists_column_wrapper<cudf::string_view>
-      expected{ { {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"}, 
+      expected{ { {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"},
                     {{"", "hhh"}, nulls_at({0})}, {"www"}},                           nulls_at({0, 2, 4}) },
                   {{"warp", "donuts", "parking"}, { "", "apply", "twelve", "mouse", "bbb"}, {"bbb", "pom"}, {}},
                   {{}}
                 }, nulls_at({2}) };
-        
+
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
 
@@ -674,14 +674,14 @@ TEST_F(ListConcatenateRowsNestedTypesTest, ListWithNullsSliced)
     cudf::table_view t({l0, l1});
     auto result = cudf::lists::concatenate_rows(t, cudf::lists::concatenate_null_policy::NULLIFY_OUTPUT_ROW);
 
-    // expected  
+    // expected
     cudf::test::lists_column_wrapper<cudf::string_view>
-      expected{ { {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"}, 
+      expected{ { {{{"abc"}, {"def", "g", "xyw", "ijk"}, {"x", "y", "", "column"},
                     {{"", "hhh"}, nulls_at({0})}, {"www"}},                           nulls_at({0, 2, 4}) },
                   {{}},
-                  {{}} 
-                }, nulls_at({1, 2}) };    
-        
+                  {{}}
+                }, nulls_at({1, 2}) };
+
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
   }
 
diff --git a/cpp/tests/lists/sort_lists_tests.cpp b/cpp/tests/lists/sort_lists_tests.cpp
index a26ae5c2f48..22c3ba581ea 100644
--- a/cpp/tests/lists/sort_lists_tests.cpp
+++ b/cpp/tests/lists/sort_lists_tests.cpp
@@ -260,12 +260,12 @@ TEST_F(SortListsDouble, InfinityAndNaN)
   {
     // clang-format off
     LCW input{0.0, -0.0, -NaN, -NaN, NaN, Inf, -Inf,
-               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
-               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
+               1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
                1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0,
@@ -277,13 +277,13 @@ TEST_F(SortListsDouble, InfinityAndNaN)
                2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
                3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
                4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-               5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
+               5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
                6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
                7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
                8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
                9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
               Inf, Inf, -NaN, -NaN, NaN, NaN, -NaN, -NaN};
-    // clang-format on          
+    // clang-format on
     auto [sorted_lists, stable_sorted_lists] =
       generate_sorted_lists(cudf::lists_column_view{input}, {}, {});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(sorted_lists->view(), expected);
diff --git a/cpp/tests/rolling/nth_element_test.cpp b/cpp/tests/rolling/nth_element_test.cpp
index 93276abbbb2..313a434dfa4 100644
--- a/cpp/tests/rolling/nth_element_test.cpp
+++ b/cpp/tests/rolling/nth_element_test.cpp
@@ -250,7 +250,7 @@ TYPED_TEST(NthElementTypedTest, GroupedRollingWindow)
   auto const group_col = fwcw<int32_t>{0, 0, 0, 0, 0, 0,
                                        10, 10, 10, 10, 10, 10, 10,
                                        20};
-  auto const input_col = fwcw<T> {0, 1, 2, 3, 4, 5,           // Group 0 
+  auto const input_col = fwcw<T> {0, 1, 2, 3, 4, 5,           // Group 0
                                   10, 11, 12, 13, 14, 15, 16, // Group 10
                                   20};                        // Group 20
   // clang-format on
@@ -267,16 +267,16 @@ TYPED_TEST(NthElementTypedTest, GroupedRollingWindow)
                                                  20},                        // Group 20
                                                 no_nulls()});
     auto const last_element = tester.test_grouped_nth_element(-1);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*last_element, 
-                                        fwcw<T>{{2, 3, 4, 5, 5, 5,           // Group 0 
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*last_element,
+                                        fwcw<T>{{2, 3, 4, 5, 5, 5,           // Group 0
                                                  12, 13, 14, 15, 16, 16, 16, // Group 10
-                                                 20},                        // Group 20 
+                                                 20},                        // Group 20
                                                 no_nulls()});
     auto const third_element = tester.test_grouped_nth_element(2);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*third_element, 
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*third_element,
                                         fwcw<T>{{2, 2, 2, 3, 4, 5,           // Group 0
                                                  12, 12, 12, 13, 14, 15, 16, // Group 10
-                                                 X},                         // Group 20                     
+                                                 X},                         // Group 20
                                                 null_at(13)});
     auto const second_last_element = tester.test_grouped_nth_element(-2);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*second_last_element,
@@ -292,27 +292,27 @@ TYPED_TEST(NthElementTypedTest, GroupedRollingWindow)
     auto const first_element = tester.test_grouped_nth_element(0);
     // clang-format off
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*first_element,
-                                        fwcw<T>{{X, 0, 1, 2, 3, X,         // Group 0 
+                                        fwcw<T>{{X, 0, 1, 2, 3, X,         // Group 0
                                                  X, 10, 11, 12, 13, 14, X, // Group 10
                                                  X},                       // Group 20
                                                 nulls_at({0, 5, 6, 12, 13})});
     auto const last_element = tester.test_grouped_nth_element(-1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*last_element,
-                                        fwcw<T>{{X, 2, 3, 4, 5, X,         // Group 0 
+                                        fwcw<T>{{X, 2, 3, 4, 5, X,         // Group 0
                                                  X, 12, 13, 14, 15, 16, X, // Group 10
-                                                 X},                       // Group 20 
+                                                 X},                       // Group 20
                                                 nulls_at({0, 5, 6, 12, 13})});
     auto const second_element = tester.test_grouped_nth_element(1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*second_element,
-                                        fwcw<T>{{X, 1, 2, 3, 4, X,         // Group 0 
+                                        fwcw<T>{{X, 1, 2, 3, 4, X,         // Group 0
                                                  X, 11, 12, 13, 14, 15, X, // Group 10
-                                                 X},                       // Group 20 
+                                                 X},                       // Group 20
                                                 nulls_at({0, 5, 6, 12, 13})});
     auto const second_last_element = tester.test_grouped_nth_element(-2);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*second_last_element,
                                         fwcw<T>{{X, 1, 2, 3, 4, X,         // Group 0
                                                  X, 11, 12, 13, 14, 15, X, // Group 10
-                                                 X},                       // Group 20               
+                                                 X},                       // Group 20
                                                 nulls_at({0, 5, 6, 12, 13})});
     // clang-format on
   }
@@ -341,7 +341,7 @@ TYPED_TEST(NthElementTypedTest, GroupedRollingWindowExcludeNulls)
                                        10, 10, 10, 10, 10, 10, 10,
                                        20,
                                        30};
-  auto const input_col = fwcw<T> {{0, 1, X, 3, X, 5,         // Group 0 
+  auto const input_col = fwcw<T> {{0, 1, X, 3, X, 5,         // Group 0
                                    10, X, X, 13, 14, 15, 16, // Group 10
                                    20,                       // Group 20
                                    X},                       // Group 30
@@ -361,25 +361,25 @@ TYPED_TEST(NthElementTypedTest, GroupedRollingWindowExcludeNulls)
                                                  X},                         // Group 30
                                                 null_at(14)});
     auto const last_element = tester.test_grouped_nth_element(-1);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*last_element, 
-                                        fwcw<T>{{1, 3, 3, 5, 5, 5,           // Group 0 
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*last_element,
+                                        fwcw<T>{{1, 3, 3, 5, 5, 5,           // Group 0
                                                  10, 13, 14, 15, 16, 16, 16, // Group 10
-                                                 20,                         // Group 20 
-                                                 X},                         // Group 30 
+                                                 20,                         // Group 20
+                                                 X},                         // Group 30
                                                 null_at(14)});
     auto const third_element = tester.test_grouped_nth_element(2);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*third_element, 
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*third_element,
                                         fwcw<T>{{X, 3, 3, 5, X, X,          // Group 0
                                                  X, X, 14, 15, 15, 15, 16,  // Group 10
-                                                 X,                         // Group 20 
-                                                 X},                        // Group 30                     
+                                                 X,                         // Group 20
+                                                 X},                        // Group 30
                                                 nulls_at({0, 4, 5, 6, 7, 13, 14})});
     auto const second_last_element = tester.test_grouped_nth_element(-2);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*second_last_element,
                                         fwcw<T>{{0, 1, 1, 3, 3, 3,          // Group 0
                                                  X, 10, 13, 14, 15, 15, 15, // Group 10
                                                  X,                         // Group 20
-                                                 X},                        // Group 30                     
+                                                 X},                        // Group 30
                                                 nulls_at({6, 13, 14})});
     // clang-format on
   }
@@ -389,30 +389,30 @@ TYPED_TEST(NthElementTypedTest, GroupedRollingWindowExcludeNulls)
     auto const first_element = tester.test_grouped_nth_element(0);
     // clang-format off
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*first_element,
-                                        fwcw<T>{{X, 0, 1, 3, 3, X,         // Group 0 
+                                        fwcw<T>{{X, 0, 1, 3, 3, X,         // Group 0
                                                  X, 10, 13, 13, 13, 14, X, // Group 10
                                                  X,                        // Group 20
                                                  X},                       // Group 30
                                                 nulls_at({0, 5, 6, 12, 13, 14})});
     auto const last_element = tester.test_grouped_nth_element(-1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*last_element,
-                                        fwcw<T>{{X, 1, 3, 3, 5, X,         // Group 0 
+                                        fwcw<T>{{X, 1, 3, 3, 5, X,         // Group 0
                                                  X, 10, 13, 14, 15, 16, X, // Group 10
-                                                 X,                        // Group 20 
+                                                 X,                        // Group 20
                                                  X},                       // Group 30
                                                 nulls_at({0, 5, 6, 12, 13, 14})});
     auto const second_element = tester.test_grouped_nth_element(1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*second_element,
-                                        fwcw<T>{{X, 1, 3, X, 5, X,       // Group 0 
+                                        fwcw<T>{{X, 1, 3, X, 5, X,       // Group 0
                                                  X, X, X, 14, 14, 15, X, // Group 10
-                                                 X,                      // Group 20 
+                                                 X,                      // Group 20
                                                  X},                     // Group 30
                                                 nulls_at({0, 3, 5, 6, 7, 8, 12, 13, 14})});
     auto const second_last_element = tester.test_grouped_nth_element(-2);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*second_last_element,
                                         fwcw<T>{{X, 0, 1, X, 3, X,       // Group 0
                                                  X, X, X, 13, 14, 15, X, // Group 10
-                                                 X,                      // Group 20               
+                                                 X,                      // Group 20
                                                  X},                     // Group 30
                                                 nulls_at({0, 3, 5, 6, 7, 8, 12, 13, 14})});
     // clang-format on
@@ -526,11 +526,11 @@ TEST_F(NthElementTest, GroupedRollingWindowForStrings)
   auto constexpr X = "";  // Placeholder for null strings.
 
   // clang-format off
-  auto const group_col = fwcw<int32_t>{0, 0, 0, 0, 0, 0,  
-                                       10, 10, 10, 10, 10, 10, 10,  
+  auto const group_col = fwcw<int32_t>{0, 0, 0, 0, 0, 0,
+                                       10, 10, 10, 10, 10, 10, 10,
                                        20};
   auto const input_col = strings{{"", "1", "22", "333", "4444", X,          // Group 0
-                                  "10", "11", "12", "13", "14", "15", "16", // Group 10 
+                                  "10", "11", "12", "13", "14", "15", "16", // Group 10
                                   "20"},                                    // Group 20
                                  null_at(5)};
   // clang-format on
@@ -543,22 +543,22 @@ TEST_F(NthElementTest, GroupedRollingWindowForStrings)
     auto const first_element = tester.test_grouped_nth_element(0);
     // clang-format off
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
-      *first_element, 
-      strings{{"", "", "", "1", "22", "333",             // Group 0 
+      *first_element,
+      strings{{"", "", "", "1", "22", "333",             // Group 0
                "10", "10", "10", "11", "12", "13", "14", // Group 10
-               "20"},                                    // Group 20 
+               "20"},                                    // Group 20
               no_nulls()});
     auto const last_element = tester.test_grouped_nth_element(-1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
-      *last_element, 
-      strings{{"22", "333", "4444", X, X, X,             // Group 0 
+      *last_element,
+      strings{{"22", "333", "4444", X, X, X,             // Group 0
                "12", "13", "14", "15", "16", "16", "16", // Group 10
-               "20"},                                    // Group 20 
+               "20"},                                    // Group 20
               nulls_at({3, 4, 5})});
     auto const third_element = tester.test_grouped_nth_element(2);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
-      *third_element, 
-      strings{{"22", "22", "22", "333", "4444", X,       // Group 0 
+      *third_element,
+      strings{{"22", "22", "22", "333", "4444", X,       // Group 0
                "12", "12", "12", "13", "14", "15", "16", // Group 10
                X},                                       // Group 20
               nulls_at({5, 13})});
@@ -567,7 +567,7 @@ TEST_F(NthElementTest, GroupedRollingWindowForStrings)
       *second_last_element,
       strings{{"1", "22", "333", "4444", "4444", "4444", // Group 0
                "11", "12", "13", "14", "15", "15", "15", // Group 10
-               X},                                       // Group 20 
+               X},                                       // Group 20
               null_at(13)});
     // clang-format on
   }
@@ -578,30 +578,30 @@ TEST_F(NthElementTest, GroupedRollingWindowForStrings)
     // clang-format off
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
       *first_element,
-      strings{{X, "", "1", "22", "333", X,         // Group 0 
+      strings{{X, "", "1", "22", "333", X,         // Group 0
                X, "10", "11", "12", "13", "14", X, // Group 10
-               X},                                 // Group 20 
+               X},                                 // Group 20
               nulls_at({0, 5, 6, 12, 13})});
     auto const last_element = tester.test_grouped_nth_element(-1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
       *last_element,
-      strings{{X, "22", "333", "4444", X, X,       // Group 0 
+      strings{{X, "22", "333", "4444", X, X,       // Group 0
                X, "12", "13", "14", "15", "16", X, // Group 10
-               X},                                 // Group 20 
+               X},                                 // Group 20
               nulls_at({0, 4, 5, 6, 12, 13})});
     auto const second_element = tester.test_grouped_nth_element(1);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
       *second_element,
-      strings{{X, "1", "22", "333", "4444", X,     // Group 0 
+      strings{{X, "1", "22", "333", "4444", X,     // Group 0
                X, "11", "12", "13", "14", "15", X, // Group 10
-               X},                                 // Group 20 
+               X},                                 // Group 20
               nulls_at({0, 5, 6, 12, 13})});
     auto const second_last_element = tester.test_grouped_nth_element(-2);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
       *second_last_element,
-      strings{{X, "1", "22", "333", "4444", X,     // Group 0 
+      strings{{X, "1", "22", "333", "4444", X,     // Group 0
                X, "11", "12", "13", "14", "15", X, // Group 10
-               X},                                 // Group 20 
+               X},                                 // Group 20
               nulls_at({0, 5, 6, 12, 13})});
     // clang-format on
   }
diff --git a/cpp/tests/strings/json_tests.cpp b/cpp/tests/strings/json_tests.cpp
index 983ccaebb44..7f6a6422d18 100644
--- a/cpp/tests/strings/json_tests.cpp
+++ b/cpp/tests/strings/json_tests.cpp
@@ -26,7 +26,7 @@
 
 // clang-format off
 std::string json_string{
-  "{" 
+  "{"
     "\"store\": {""\"book\": ["
         "{"
           "\"category\": \"reference\","
@@ -101,7 +101,7 @@ TEST_F(JsonPathTests, GetJsonObjectChildOp)
     auto result     = drop_whitespace(*result_raw);
 
     // clang-format off
-    cudf::test::strings_column_wrapper expected_raw{     
+    cudf::test::strings_column_wrapper expected_raw{
       "{"
         "\"book\": ["
           "{"
@@ -246,7 +246,7 @@ TEST_F(JsonPathTests, GetJsonObjectWildcardOp)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected_raw{
-      "[" 
+      "["
         "{"
           "\"book\": ["
             "{"
@@ -676,7 +676,7 @@ TEST_F(JsonPathTests, MixedOutput)
          "\"z\": {\"i\": 10, \"j\": 100},"
          "\"b\": [\"c\",null,true,-1]"
       "}"
-      }, 
+      },
       {1, 1, 0, 1, 1, 1});
     // clang-format on
 
@@ -708,11 +708,11 @@ TEST_F(JsonPathTests, MixedOutput)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected({
-      "c", 
-      "c", 
-      "", 
-      "", 
-      "", 
+      "c",
+      "c",
+      "",
+      "",
+      "",
       "[\"c\",null,true,-1]"},
       {1, 1, 0, 0, 0, 1});
     // clang-format on
@@ -726,11 +726,11 @@ TEST_F(JsonPathTests, MixedOutput)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected({
-      "[\"c\"]", 
-      "[\"c\"]", 
-      "", 
-      "[\"y\",500]", 
-      "[]", 
+      "[\"c\"]",
+      "[\"c\"]",
+      "",
+      "[\"y\",500]",
+      "[]",
       "["
         "{\"i\": 10, \"j\": 100},"
         "[\"c\",null,true,-1]"
@@ -747,11 +747,11 @@ TEST_F(JsonPathTests, MixedOutput)
 
     // clang-format off
     cudf::test::strings_column_wrapper expected({
-      "[]", 
-      "[]", 
-      "", 
+      "[]",
+      "[]",
+      "",
+      "",
       "",
-      "",      
       "[\"c\",null,true,-1]"},
       {1, 1, 0, 0, 0, 1});
     // clang-format on
@@ -835,7 +835,7 @@ TEST_F(JsonPathTests, AllowSingleQuotes)
                 "\'b\': [\'c\',null,true,-1]"
               "}"
     "}",
-    
+
     "{"
       "\'a\': \"abc'def\""
     "}",
@@ -869,7 +869,7 @@ TEST_F(JsonPathTests, AllowSingleQuotes)
       "}",
       "abc'def",
       "'abc'def'"
-      }, 
+      },
       {1, 1, 0, 1, 1, 1, 1, 1});
     // clang-format on
 
diff --git a/cpp/tests/transform/integration/unary-transform-test.cpp b/cpp/tests/transform/integration/unary-transform-test.cpp
index 3f9088f9b4f..f935c83bc9c 100644
--- a/cpp/tests/transform/integration/unary-transform-test.cpp
+++ b/cpp/tests/transform/integration/unary-transform-test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Copyright 2018-2019 BlazingDB, Inc.
  *     Copyright 2018 Christian Noboa Mardini <christian@blazingdb.com>
@@ -153,13 +153,13 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
   const char cuda[] =
     R"***(
 __device__ inline void f(
-  signed char* output, 
+  signed char* output,
   signed char input
 ){
-	if(input > 96 && input < 123){	
+	if(input > 96 && input < 123){
   	*output = input - 32;
   }else{
-  	*output = input;    
+  	*output = input;
   }
 }
 )***";
diff --git a/docs/cudf/README.md b/docs/cudf/README.md
index 6d07ec561bf..004f1998966 100644
--- a/docs/cudf/README.md
+++ b/docs/cudf/README.md
@@ -2,5 +2,5 @@
 
 This directory contains the documentation of cuDF Python.
 For more information on how to write, build, and read the documentation,
-see 
+see
 [the developer documentation](https://github.com/rapidsai/cudf/blob/HEAD/docs/cudf/source/developer_guide/documentation.md).
diff --git a/docs/cudf/source/api_docs/groupby.rst b/docs/cudf/source/api_docs/groupby.rst
index f36951749fb..701676a1779 100644
--- a/docs/cudf/source/api_docs/groupby.rst
+++ b/docs/cudf/source/api_docs/groupby.rst
@@ -63,7 +63,7 @@ Computations / descriptive stats
    GroupBy.var
    GroupBy.corr
    GroupBy.cov
-   
+
 The following methods are available in both ``SeriesGroupBy`` and
 ``DataFrameGroupBy`` objects, but may differ slightly, usually in that
 the ``DataFrameGroupBy`` version usually permits the specification of an
diff --git a/docs/cudf/source/api_docs/index_objects.rst b/docs/cudf/source/api_docs/index_objects.rst
index 6edd15e7176..46b3e864e35 100644
--- a/docs/cudf/source/api_docs/index_objects.rst
+++ b/docs/cudf/source/api_docs/index_objects.rst
@@ -270,7 +270,7 @@ Time/date components
    DatetimeIndex.weekday
    DatetimeIndex.quarter
    DatetimeIndex.is_leap_year
-   
+
    DatetimeIndex.isocalendar
 
 Time-specific operations
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 842319338b3..245793e5ea6 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -280,7 +280,7 @@ Datetime properties
    is_year_start
    is_year_end
    is_leap_year
-   days_in_month   
+   days_in_month
 
 Datetime methods
 ^^^^^^^^^^^^^^^^
@@ -303,7 +303,7 @@ Timedelta properties
    :toctree: api/
 
    days
-   seconds   
+   seconds
    microseconds
    nanoseconds
    components
diff --git a/docs/cudf/source/api_docs/string_handling.rst b/docs/cudf/source/api_docs/string_handling.rst
index 2285bb8fb7a..57a09dee8e2 100644
--- a/docs/cudf/source/api_docs/string_handling.rst
+++ b/docs/cudf/source/api_docs/string_handling.rst
@@ -95,4 +95,3 @@ strings and apply several methods to it. These can be accessed like
    url_encode
    wrap
    zfill
-   
diff --git a/docs/cudf/source/api_docs/subword_tokenize.rst b/docs/cudf/source/api_docs/subword_tokenize.rst
index e8737a9ee0a..fc814bcb92a 100644
--- a/docs/cudf/source/api_docs/subword_tokenize.rst
+++ b/docs/cudf/source/api_docs/subword_tokenize.rst
@@ -8,5 +8,5 @@ Constructor
 .. autosummary::
    :toctree: api/
    :template: autosummary/class_with_autosummary.rst
-   
+
    SubwordTokenizer
diff --git a/docs/cudf/source/developer_guide/benchmarking.md b/docs/cudf/source/developer_guide/benchmarking.md
index 9370bde8c1e..dd0e1aca0c5 100644
--- a/docs/cudf/source/developer_guide/benchmarking.md
+++ b/docs/cudf/source/developer_guide/benchmarking.md
@@ -93,7 +93,7 @@ To satisfy these requirements, one must follow these rules when writing benchmar
    This enables running the benchmarks in "test" mode on small datasets, which will be much faster.
 
 
-### Writing benchmarks 
+### Writing benchmarks
 
 Just as benchmarks should be written in terms of the highest level classes in the hierarchy,
 they should also assume as little as possible about the nature of the data.
diff --git a/docs/cudf/source/developer_guide/contributing_guide.md b/docs/cudf/source/developer_guide/contributing_guide.md
index 1126e5c110a..d78b576320e 100644
--- a/docs/cudf/source/developer_guide/contributing_guide.md
+++ b/docs/cudf/source/developer_guide/contributing_guide.md
@@ -15,7 +15,7 @@ Developers are strongly recommended to set up `pre-commit` prior to any developm
 The `.pre-commit-config.yaml` file at the root of the repo is the primary source of truth linting.
 Specifically, cuDF uses the following tools:
 
-- [`flake8`](https://github.com/pycqa/flake8) checks for general code formatting compliance. 
+- [`flake8`](https://github.com/pycqa/flake8) checks for general code formatting compliance.
 - [`black`](https://github.com/psf/black) is an automatic code formatter.
 - [`isort`](https://pycqa.github.io/isort/) ensures imports are sorted consistently.
 - [`mypy`](http://mypy-lang.org/) performs static type checking.
diff --git a/docs/cudf/source/developer_guide/documentation.md b/docs/cudf/source/developer_guide/documentation.md
index b2c66ed43c5..187934cd274 100644
--- a/docs/cudf/source/developer_guide/documentation.md
+++ b/docs/cudf/source/developer_guide/documentation.md
@@ -4,7 +4,7 @@ cuDF documentation is split into multiple pieces.
 All core functionality is documented using inline docstrings.
 Additional pages like user or developer guides are written independently.
 While docstrings are written using [reStructuredText](https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html) (reST),
-the latter are written using [MyST](https://myst-parser.readthedocs.io/en/latest/) 
+the latter are written using [MyST](https://myst-parser.readthedocs.io/en/latest/)
 The inline docstrings are organized using a small set of additional reST pages.
 The results are all then compiled together using [Sphinx](https://www.sphinx-doc.org/en/master/).
 This document discusses each of these components and how to contribute to them.
@@ -38,7 +38,7 @@ class A:
         ----------
         bar : str
             Description of bar.
-        
+
         Returns
         -------
         float
@@ -167,7 +167,7 @@ so links should make use of the appropriately namespaced anchors for links rathe
 The following are required to build the documentation:
 - A RAPIDS-compatible GPU. This is necessary because the documentation execute code.
 - A working copy of cudf in the same build environment.
-  We recommend following the [build instructions](https://github.com/rapidsai/cudf/blob/main/CONTRIBUTING.md#setting-up-your-build-environment). 
+  We recommend following the [build instructions](https://github.com/rapidsai/cudf/blob/main/CONTRIBUTING.md#setting-up-your-build-environment).
 - Sphinx, numpydoc, and MyST-NB.
   Assuming you follow the build instructions, these should automatically be installed into your environment.
 
@@ -190,7 +190,7 @@ Alternatively, you may specify a port with `python -m http.server $PORT`.
 You may build docs on a remote machine but want to view them locally.
 Assuming the other machine's IP address is visible on your local network,
 you can view the docs by replacing `localhost` with the IP address of the host machine.
-Alternatively, you may also forward the port using e.g. 
+Alternatively, you may also forward the port using e.g.
 `ssh -N -f -L localhost:$LOCAL_PORT:localhost:$REMOTE_PORT $REMOTE_IP`.
 That will make `$REMOTE_IP:$REMOTE_PORT` visible at `localhost:$LOCAL_PORT`.
 
diff --git a/docs/cudf/source/developer_guide/library_design.md b/docs/cudf/source/developer_guide/library_design.md
index 457ae6a39ff..2f0fb5d86fc 100644
--- a/docs/cudf/source/developer_guide/library_design.md
+++ b/docs/cudf/source/developer_guide/library_design.md
@@ -27,24 +27,24 @@ Finally we tie these pieces together to provide a more holistic view of the proj
 % class RangeIndex
 % class DataFrame
 % class Series
-% 
+%
 % Frame <|-- IndexedFrame
-% 
+%
 % Frame <|-- SingleColumnFrame
-% 
+%
 % SingleColumnFrame <|-- Series
 % IndexedFrame <|-- Series
-% 
+%
 % IndexedFrame <|-- DataFrame
-% 
+%
 % BaseIndex <|-- RangeIndex
-% 
+%
 % BaseIndex <|-- MultiIndex
 % Frame <|-- MultiIndex
-% 
+%
 % BaseIndex <|-- GenericIndex
 % SingleColumnFrame <|-- GenericIndex
-% 
+%
 % @enduml
 
 
diff --git a/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst b/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
index 1a8e709d938..808c20e0750 100644
--- a/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
+++ b/docs/cudf/source/user_guide/cudf.CategoricalDtype.rst
@@ -6,13 +6,13 @@ cudf.CategoricalDtype
 .. autoclass:: CategoricalDtype
    :members: categories, ordered, from_pandas, to_pandas
 
-   
-   
+
+
 ..
    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
    .. autosummary::
       :toctree:
-      
+
       CategoricalDtype.categories
       CategoricalDtype.ordered
       CategoricalDtype.from_pandas
diff --git a/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
index cd0990faa3f..cada8fd6cb6 100644
--- a/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
+++ b/docs/cudf/source/user_guide/cudf.Decimal128Dtype.rst
@@ -6,13 +6,13 @@ cudf.Decimal128Dtype
 .. autoclass:: Decimal128Dtype
    :members: precision, scale, itemsize, to_arrow, from_arrow
 
-   
-   
+
+
 ..
    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
    .. autosummary::
       :toctree:
-      
+
       Decimal128Dtype.precision
       Decimal128Dtype.scale
       Decimal128Dtype.itemsize
diff --git a/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
index 57067fdcf41..c4c65bb2d24 100644
--- a/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
+++ b/docs/cudf/source/user_guide/cudf.Decimal32Dtype.rst
@@ -6,13 +6,13 @@ cudf.Decimal32Dtype
 .. autoclass:: Decimal32Dtype
    :members: precision, scale, itemsize, to_arrow, from_arrow
 
-   
-   
+
+
 ..
    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
    .. autosummary::
       :toctree:
-      
+
       Decimal32Dtype.precision
       Decimal32Dtype.scale
       Decimal32Dtype.itemsize
diff --git a/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst b/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
index c855bf9da2c..99305ade485 100644
--- a/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
+++ b/docs/cudf/source/user_guide/cudf.Decimal64Dtype.rst
@@ -6,13 +6,13 @@ cudf.Decimal64Dtype
 .. autoclass:: Decimal64Dtype
    :members: precision, scale, itemsize, to_arrow, from_arrow
 
-   
-   
+
+
 ..
    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
    .. autosummary::
       :toctree:
-      
+
       Decimal64Dtype.precision
       Decimal64Dtype.scale
       Decimal64Dtype.itemsize
diff --git a/docs/cudf/source/user_guide/cudf.ListDtype.rst b/docs/cudf/source/user_guide/cudf.ListDtype.rst
index e5a90ddd2ed..a9b5000e657 100644
--- a/docs/cudf/source/user_guide/cudf.ListDtype.rst
+++ b/docs/cudf/source/user_guide/cudf.ListDtype.rst
@@ -6,13 +6,13 @@ cudf.ListDtype
 .. autoclass:: ListDtype
    :members: element_type, leaf_type, from_arrow, to_arrow
 
-   
-   
+
+
 ..
    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
    .. autosummary::
       :toctree:
-      
+
       ListDtype.element_type
       ListDtype.leaf_type
       ListDtype.from_arrow
diff --git a/docs/cudf/source/user_guide/cudf.StructDtype.rst b/docs/cudf/source/user_guide/cudf.StructDtype.rst
index 4ef23995de2..dd2a841dbe3 100644
--- a/docs/cudf/source/user_guide/cudf.StructDtype.rst
+++ b/docs/cudf/source/user_guide/cudf.StructDtype.rst
@@ -6,13 +6,13 @@ cudf.StructDtype
 .. autoclass:: StructDtype
    :members: fields, from_arrow, to_arrow
 
-   
-   
+
+
 ..
    HACK -- the point here is that we don't want this to appear in the output, but the autosummary should still generate the pages.
    .. autosummary::
       :toctree:
-      
+
       StructDtype.fields
       StructDtype.from_arrow
       StructDtype.to_arrow
diff --git a/docs/cudf/source/user_guide/data-types.md b/docs/cudf/source/user_guide/data-types.md
index 85152930a73..ee75457e87d 100644
--- a/docs/cudf/source/user_guide/data-types.md
+++ b/docs/cudf/source/user_guide/data-types.md
@@ -46,7 +46,7 @@ dtype: float32
 The data type associated with string data in cuDF is `"np.object"`.
 
 ```python
->>> import cudf 
+>>> import cudf
 >>> s = cudf.Series(["abc", "def", "ghi"])
 >>> s.dtype
 dtype("object")
diff --git a/java/README.md b/java/README.md
index 05a24c1d3d3..2d8e2190fee 100644
--- a/java/README.md
+++ b/java/README.md
@@ -34,7 +34,7 @@ most modern cuda drivers.
 </dependency>
 ```
 
-In some cases there may be a classifier to indicate the version of cuda required. See the 
+In some cases there may be a classifier to indicate the version of cuda required. See the
 [Build From Source](#build-from-source) section below for more information about when this
 can happen. No official release of the jar will have a classifier on it.
 
@@ -114,12 +114,12 @@ mvn clean install -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=ON
 
 ## GPUDirect Storage (GDS)
 
-The JNI code can be built with *GPUDirect Storage* (GDS) support, which enables direct copying 
+The JNI code can be built with *GPUDirect Storage* (GDS) support, which enables direct copying
 between GPU device buffers and supported filesystems (see
 https://docs.nvidia.com/gpudirect-storage/).
 
 To enable GDS support, first make sure GDS is installed (see
-https://docs.nvidia.com/gpudirect-storage/troubleshooting-guide/index.html), then run:  
+https://docs.nvidia.com/gpudirect-storage/troubleshooting-guide/index.html), then run:
 ```shell script
 cd src/cudf/java
 mvn clean install -DUSE_GDS=ON
diff --git a/java/src/main/java/ai/rapids/cudf/BinaryOperable.java b/java/src/main/java/ai/rapids/cudf/BinaryOperable.java
index 2b1afb92e36..48a7861f1a1 100644
--- a/java/src/main/java/ai/rapids/cudf/BinaryOperable.java
+++ b/java/src/main/java/ai/rapids/cudf/BinaryOperable.java
@@ -383,7 +383,7 @@ default ColumnVector and(BinaryOperable rhs) {
   }
 
   /**
-   * Logical or (||) with the given output type. this || rhs  
+   * Logical or (||) with the given output type. this || rhs
    */
   default ColumnVector or(BinaryOperable rhs, DType outType) {
     return binaryOp(BinaryOp.LOGICAL_OR, rhs, outType);
diff --git a/java/src/main/java/ai/rapids/cudf/Cuda.java b/java/src/main/java/ai/rapids/cudf/Cuda.java
index 56a754279fc..e1298e29925 100755
--- a/java/src/main/java/ai/rapids/cudf/Cuda.java
+++ b/java/src/main/java/ai/rapids/cudf/Cuda.java
@@ -388,7 +388,7 @@ static void asyncMemcpy(long dst, long src, long count, CudaMemcpyKind kind) {
 
   /**
    * Gets the major CUDA compute capability of the current device.
-   * 
+   *
    * For reference: https://developer.nvidia.com/cuda-gpus
    * Hardware Generation	Compute Capability
    *     Ampere	                8.x
@@ -398,15 +398,15 @@ static void asyncMemcpy(long dst, long src, long count, CudaMemcpyKind kind) {
    *     Maxwell                5.x
    *     Kepler	                3.x
    *     Fermi	                2.x
-   * 
+   *
    * @return The Major compute capability version number of the current CUDA device
    * @throws CudaException on any error
    */
-  public static native int getComputeCapabilityMajor() throws CudaException;  
+  public static native int getComputeCapabilityMajor() throws CudaException;
 
   /**
    * Gets the minor CUDA compute capability of the current device.
-   * 
+   *
    * For reference: https://developer.nvidia.com/cuda-gpus
    * Hardware Generation	Compute Capability
    *     Ampere	                8.x
@@ -416,7 +416,7 @@ static void asyncMemcpy(long dst, long src, long count, CudaMemcpyKind kind) {
    *     Maxwell                5.x
    *     Kepler	                3.x
    *     Fermi	                2.x
-   * 
+   *
    * @return The Minor compute capability version number of the current CUDA device
    * @throws CudaException on any error
    */
diff --git a/java/src/main/java/ai/rapids/cudf/NvtxRange.java b/java/src/main/java/ai/rapids/cudf/NvtxRange.java
index 813ab5e66cf..c683139f775 100644
--- a/java/src/main/java/ai/rapids/cudf/NvtxRange.java
+++ b/java/src/main/java/ai/rapids/cudf/NvtxRange.java
@@ -34,7 +34,7 @@
  * Instances should be associated with a single thread to avoid pushing an NVTX range in
  * one thread and then trying to pop the range in a different thread.
  *
- * Push/pop ranges show a stacking behavior in tools such as Nsight, where newly pushed 
+ * Push/pop ranges show a stacking behavior in tools such as Nsight, where newly pushed
  * ranges are correlated and enclosed by the prior pushed range (in the example above,
  * "b" is enclosed by "a").
  */
diff --git a/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java b/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java
index 36f39aa8ad3..262d7f1f2c3 100644
--- a/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java
+++ b/java/src/main/java/ai/rapids/cudf/OutOfBoundsPolicy.java
@@ -32,7 +32,7 @@
  */
 public enum OutOfBoundsPolicy {
   /* Output values corresponding to out-of-bounds indices are null */
-  NULLIFY,  
+  NULLIFY,
 
   /* No bounds checking is performed, better performance */
   DONT_CHECK
diff --git a/java/src/main/java/ai/rapids/cudf/RollingAggregation.java b/java/src/main/java/ai/rapids/cudf/RollingAggregation.java
index 718e17c7f5c..a460bd46ab4 100644
--- a/java/src/main/java/ai/rapids/cudf/RollingAggregation.java
+++ b/java/src/main/java/ai/rapids/cudf/RollingAggregation.java
@@ -220,7 +220,7 @@ public static RollingAggregation collectSet(NullPolicy nullPolicy, NullEquality
 
   /**
    * Select the nth element from a specified window.
-   * 
+   *
    * @param n          Indicates the index of the element to be selected from the window
    * @param nullPolicy Indicates whether null elements are to be skipped, or not
    */
diff --git a/java/src/main/native/.clang-format b/java/src/main/native/.clang-format
index 2b6bd9a27bb..e0866533a36 100644
--- a/java/src/main/native/.clang-format
+++ b/java/src/main/native/.clang-format
@@ -8,12 +8,12 @@ AlignAfterOpenBracket: Align
 # int aaaa = 12;
 # int b    = 23;
 # int ccc  = 23;
-# leaving OFF 
+# leaving OFF
 AlignConsecutiveAssignments: false
 # int         aaaa = 12;
 # float       b = 23;
 # std::string ccc = 23;
-# leaving OFF 
+# leaving OFF
 AlignConsecutiveDeclarations: false
 ##define A                                                                      \
 #  int aaaa;                                                                    \
@@ -59,14 +59,14 @@ AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: false
 AlwaysBreakTemplateDeclarations: MultiLine
 
-# if all the arguments for a function don't fit in a single line, 
+# if all the arguments for a function don't fit in a single line,
 # with a value of "false", it'll split each argument into different lines
 BinPackArguments: true
 BinPackParameters: true
 
 # if this is set to Custom, the BraceWrapping flags apply
 BreakBeforeBraces: Custom
-BraceWrapping:   
+BraceWrapping:
   AfterClass:      false
   AfterControlStatement: false
   AfterEnum:       false
@@ -129,12 +129,12 @@ DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
 # } // namespace a => useful
 FixNamespaceComments: true
-ForEachMacros:   
+ForEachMacros:
   - foreach
   - Q_FOREACH
   - BOOST_FOREACH
 IncludeBlocks:   Regroup
-IncludeCategories: 
+IncludeCategories:
   - Regex:           '<[[:alnum:]]+>'
     Priority:        0
   - Regex:           '<[[:alnum:].]+>'
@@ -146,7 +146,7 @@ IncludeCategories:
   - Regex:           '.*'
     Priority:        4
 # if a header matches this in an include group, it will be moved up to the
-# top of the group. 
+# top of the group.
 IncludeIsMainRegex: '(Test)?$'
 IndentCaseLabels: true
 IndentPPDirectives: None
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index f564a55463b..2a33c37a8d6 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -5787,8 +5787,8 @@ private static Scalar getDecimalScalarRangeBounds(int scale, int unscaledValue,
       case 2: return Scalar.fromDecimal(scale, unscaledValue);
       case 3: return Scalar.fromDecimal(scale, Long.valueOf(unscaledValue));
       case 4: return Scalar.fromDecimal(scale, big(unscaledValue));
-      default: 
-        throw new IllegalStateException("Unexpected order by column index: " 
+      default:
+        throw new IllegalStateException("Unexpected order by column index: "
                                         + orderby_col_idx);
     }
   }
@@ -5798,11 +5798,11 @@ void testRangeWindowsWithDecimalOrderBy() {
     try (Table unsorted = new Table.TestBuilder()
         .column(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) // GBY Key
         .column(1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3) // GBY Key
-        .decimal32Column(-1, 4000, 3000, 2000, 1000, 
-                             4000, 3000, 2000, 1000, 
+        .decimal32Column(-1, 4000, 3000, 2000, 1000,
+                             4000, 3000, 2000, 1000,
                              4000, 3000, 2000, 1000) // Decimal OBY Key
-        .decimal64Column(-1, 4000l, 3000l, 2000l, 1000l, 
-                             4000l, 3000l, 2000l, 1000l, 
+        .decimal64Column(-1, 4000l, 3000l, 2000l, 1000l,
+                             4000l, 3000l, 2000l, 1000l,
                              4000l, 3000l, 2000l, 1000l) // Decimal OBY Key
         .decimal128Column(-1, RoundingMode.UNNECESSARY,
                               big(4000), big(3000), big(2000), big(1000),
@@ -5811,13 +5811,13 @@ void testRangeWindowsWithDecimalOrderBy() {
         .column(9, 1, 5, 7, 2, 8, 9, 7, 6, 6, 0, 8) // Agg Column
         .build()) {
 
-      // Columns 2,3,4 are decimal order-by columns of type DECIMAL32, DECIMAL64, 
+      // Columns 2,3,4 are decimal order-by columns of type DECIMAL32, DECIMAL64,
       // and DECIMAL128 respectively, with similarly ordered values.
       // In the following loop, each decimal type is tested as the order-by column,
       // producing the same results with similar range bounds.
       for (int decimal_oby_col_idx = 2; decimal_oby_col_idx <= 4; ++decimal_oby_col_idx) {
-        try (Table sorted = unsorted.orderBy(OrderByArg.asc(0), 
-                                             OrderByArg.asc(1), 
+        try (Table sorted = unsorted.orderBy(OrderByArg.asc(0),
+                                             OrderByArg.asc(1),
                                              OrderByArg.asc(decimal_oby_col_idx));
             ColumnVector expectSortedAggColumn = ColumnVector.fromBoxedInts(7, 5, 1, 9, 7, 9, 8, 2, 8, 0, 6, 6)) {
           ColumnVector sortedAggColumn = sorted.getColumn(5);
diff --git a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java
index 9a929cec98d..c22acac747e 100644
--- a/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TimestampColumnVectorTest.java
@@ -30,7 +30,7 @@ public class TimestampColumnVectorTest extends CudfTestBase {
                                   17716,    //2018-07-04
                                   19382,    //2023-01-25
                                   -1528,    //1965-10-26
-                                  17716};   //2018-07-04 
+                                  17716};   //2018-07-04
 
   static final long[] TIMES_S = {-131968728L,   //'1965-10-26 14:01:12' Tuesday
                                  1530705600L,   //'2018-07-04 12:00:00' Wednesday
diff --git a/python/cudf/cudf/_fuzz_testing/tests/readme.md b/python/cudf/cudf/_fuzz_testing/tests/readme.md
index 3e30aa8c924..f9ef1119a21 100644
--- a/python/cudf/cudf/_fuzz_testing/tests/readme.md
+++ b/python/cudf/cudf/_fuzz_testing/tests/readme.md
@@ -7,7 +7,7 @@ This directory contains all the Fuzz tests for cudf library.
 
 1. Add a Data Handler class which actually generates the necessary random data according to your requirements. This class should be added in `cudf/cudf/testing/`. A sample data handler class is: `CSVWriter`: https://github.com/rapidsai/cudf/blob/branch-0.16/python/cudf/cudf/testing/csv.py
 2. Data Handlers are registered by the `pythonfuzz` decorator. At runtime, the Fuzzer will continuously run registered fuzz tests.
-  
+
 ```python
 from cudf.testing.csv import CSVWriter
 
@@ -37,7 +37,7 @@ python write_csv.py csv_writer_test
 
 ## Tips to run specific crash file/files
 
-Using the `pythonfuzz` decorator pass in `regression=True` with `dirs` having list of directories 
+Using the `pythonfuzz` decorator pass in `regression=True` with `dirs` having list of directories
 ```python
 @pythonfuzz(data_handle=CSVWriter, regression=True, dir=["/cudf/python/cudf/cudf/_fuzz_testing"])
 ```
@@ -52,7 +52,7 @@ and passed to the `your_custom_fuzz_test`.
 If a parameter value depends the kind of input generated by the `data_handle`(in this case `CSVReader`),
 then you can assign `ALL_POSSIBLE_VALUES` constant to it. This constant is used as an identifier by the
 `data_handle` to generate random parameter values for that specific parameter purely based on data.
-To perform this customization `set_rand_params` should be implemented as shown in the below example. 
+To perform this customization `set_rand_params` should be implemented as shown in the below example.
 ```python
 from cudf._fuzz_testing.main import pythonfuzz
 from cudf._fuzz_testing.utils import ALL_POSSIBLE_VALUES
diff --git a/python/custreamz/README.md b/python/custreamz/README.md
index 6b105c9ea4a..99ada746ec8 100644
--- a/python/custreamz/README.md
+++ b/python/custreamz/README.md
@@ -66,4 +66,4 @@ Nightly:
 conda install -c rapidsai-nightly cudf_kafka custreamz
 ```
 
-See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info. 
+See the [Get RAPIDS version picker](https://rapids.ai/start.html) for more OS and version info.

From 5ace809af633624223a4a925f2647ab5c0a8aff1 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 2 Nov 2022 08:32:45 -0400
Subject: [PATCH 111/202] Add strings udf C++ classes and functions for phase
 II (#11912)

Adds the C++ classes and functions for the phase II of strings udf. This specifically includes the device side string class which can be used for building udfs the create or modify strings.
Also included are some basic helper functions for split, strip, case, and numeric conversion.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/11912
---
 .../cudf/strings/detail/char_tables.hpp       |   1 +
 cpp/include/cudf/strings/string_view.hpp      |   2 +
 .../cpp/include/cudf/strings/udf/case.cuh     | 211 +++++++
 .../cpp/include/cudf/strings/udf/numeric.cuh  |  72 +++
 .../cpp/include/cudf/strings/udf/pad.cuh      |  72 +++
 .../cpp/include/cudf/strings/udf/split.cuh    | 282 +++++++++
 .../cpp/include/cudf/strings/udf/strip.cuh    |  80 +++
 .../cpp/include/cudf/strings/udf/udf_apis.hpp |  36 +-
 .../include/cudf/strings/udf/udf_string.cuh   | 457 +++++++++++++++
 .../include/cudf/strings/udf/udf_string.hpp   | 550 ++++++++++++++++++
 .../cpp/src/strings/udf/udf_apis.cu           |  74 +++
 11 files changed, 1836 insertions(+), 1 deletion(-)
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
 create mode 100644 python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp

diff --git a/cpp/include/cudf/strings/detail/char_tables.hpp b/cpp/include/cudf/strings/detail/char_tables.hpp
index 4ea7e3ee952..275b7223a3b 100644
--- a/cpp/include/cudf/strings/detail/char_tables.hpp
+++ b/cpp/include/cudf/strings/detail/char_tables.hpp
@@ -46,6 +46,7 @@ constexpr uint8_t IS_LOWER(uint8_t x) { return ((x) & (1 << 6)); }
 constexpr uint8_t IS_SPECIAL(uint8_t x) { return ((x) & (1 << 7)); }
 constexpr uint8_t IS_ALPHANUM(uint8_t x) { return ((x) & (0x0F)); }
 constexpr uint8_t IS_UPPER_OR_LOWER(uint8_t x) { return ((x) & ((1 << 5) | (1 << 6))); }
+constexpr uint8_t ALL_FLAGS = 0xFF;
 
 // Type for the character cases table.
 using character_cases_table_type = uint16_t;
diff --git a/cpp/include/cudf/strings/string_view.hpp b/cpp/include/cudf/strings/string_view.hpp
index 03bf538b1b2..265adc60392 100644
--- a/cpp/include/cudf/strings/string_view.hpp
+++ b/cpp/include/cudf/strings/string_view.hpp
@@ -17,6 +17,8 @@
 
 #include <cudf/types.hpp>
 
+#include <cuda_runtime.h>
+
 #include <iterator>
 
 /**
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
new file mode 100644
index 00000000000..472101959a6
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/case.cuh
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/char_tables.hpp>
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Global variables for character-type flags and case conversion
+ */
+struct chars_tables {
+  cudf::strings::detail::character_flags_table_type* flags_table;
+  cudf::strings::detail::character_cases_table_type* cases_table;
+  struct cudf::strings::detail::special_case_mapping* special_case_mapping_table;
+};
+
+namespace detail {
+
+/**
+ * @brief Utility for converting a single character
+ *
+ * There are special cases where the conversion may result in multiple characters.
+ *
+ * @param tables The char tables required for conversion
+ * @param result String to append the converted character
+ * @param code_point The code-point of the character to convert
+ * @param flag The char-type flag of the character to convert
+ */
+__device__ inline void convert_char(chars_tables const tables,
+                                    udf_string& result,
+                                    uint32_t code_point,
+                                    uint8_t flag)
+{
+  if (!cudf::strings::detail::IS_SPECIAL(flag)) {
+    result.append(cudf::strings::detail::codepoint_to_utf8(tables.cases_table[code_point]));
+    return;
+  }
+
+  // handle special case
+  auto const map =
+    tables
+      .special_case_mapping_table[cudf::strings::detail::get_special_case_hash_index(code_point)];
+  auto const output_count =
+    cudf::strings::detail::IS_LOWER(flag) ? map.num_upper_chars : map.num_lower_chars;
+  auto const* output_chars = cudf::strings::detail::IS_LOWER(flag) ? map.upper : map.lower;
+  for (uint16_t idx = 0; idx < output_count; idx++) {
+    result.append(cudf::strings::detail::codepoint_to_utf8(output_chars[idx]));
+  }
+}
+
+/**
+ * @brief Converts the given string to either upper or lower case
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @param case_flag Identifies upper/lower case conversion
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string convert_case(
+  chars_tables const tables,
+  string_view d_str,
+  cudf::strings::detail::character_flags_table_type case_flag)
+{
+  udf_string result;
+  for (auto const chr : d_str) {
+    auto const code_point = cudf::strings::detail::utf8_to_codepoint(chr);
+    auto const flag       = code_point <= 0x00FFFF ? tables.flags_table[code_point] : 0;
+
+    if ((flag & case_flag) || (cudf::strings::detail::IS_SPECIAL(flag) &&
+                               !cudf::strings::detail::IS_UPPER_OR_LOWER(flag))) {
+      convert_char(tables, result, code_point, flag);
+    } else {
+      result.append(chr);
+    }
+  }
+
+  return result;
+}
+
+/**
+ * @brief Utility for capitalize and title functions
+ *
+ * @tparam CapitalizeNextFn returns true if the next candidate character should be capitalized
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @param next_fn Function for next character capitalized
+ * @return New string containing the converted characters
+ */
+template <typename CapitalizeNextFn>
+__device__ inline udf_string capitalize(chars_tables const tables,
+                                        string_view d_str,
+                                        CapitalizeNextFn next_fn)
+{
+  udf_string result;
+  bool capitalize = true;
+  for (auto const chr : d_str) {
+    auto const code_point = cudf::strings::detail::utf8_to_codepoint(chr);
+    auto const flag       = code_point <= 0x00FFFF ? tables.flags_table[code_point] : 0;
+    auto const change_case =
+      capitalize ? cudf::strings::detail::IS_LOWER(flag) : cudf::strings::detail::IS_UPPER(flag);
+    if (change_case) {
+      detail::convert_char(tables, result, code_point, flag);
+    } else {
+      result.append(chr);
+    }
+    capitalize = next_fn(flag);
+  }
+  return result;
+}
+}  // namespace detail
+
+/**
+ * @brief Converts the given string to lower case
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string to_lower(chars_tables const tables, string_view d_str)
+{
+  cudf::strings::detail::character_flags_table_type case_flag = cudf::strings::detail::IS_UPPER(
+    cudf::strings::detail::ALL_FLAGS);  // convert only upper case characters
+  return detail::convert_case(tables, d_str, case_flag);
+}
+
+/**
+ * @brief Converts the given string to upper case
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string to_upper(chars_tables const tables, string_view d_str)
+{
+  cudf::strings::detail::character_flags_table_type case_flag = cudf::strings::detail::IS_LOWER(
+    cudf::strings::detail::ALL_FLAGS);  // convert only lower case characters
+  return detail::convert_case(tables, d_str, case_flag);
+}
+
+/**
+ * @brief Converts the given string to lower/upper case
+ *
+ * All lower case characters are converted to upper case and
+ * all upper case characters are converted to lower case.
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string swap_case(chars_tables const tables, string_view d_str)
+{
+  cudf::strings::detail::character_flags_table_type case_flag =
+    cudf::strings::detail::IS_LOWER(cudf::strings::detail::ALL_FLAGS) |
+    cudf::strings::detail::IS_UPPER(cudf::strings::detail::ALL_FLAGS);
+  return detail::convert_case(tables, d_str, case_flag);
+}
+
+/**
+ * @brief Capitalize the first character of the given string
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string capitalize(chars_tables const tables, string_view d_str)
+{
+  auto next_fn = [](cudf::strings::detail::character_flags_table_type) -> bool { return false; };
+  return detail::capitalize(tables, d_str, next_fn);
+}
+
+/**
+ * @brief Converts the given string to title case
+ *
+ * The first character after a non-character is converted to upper case.
+ * All other characters are converted to lower case.
+ *
+ * @param tables The char tables required for conversion
+ * @param d_str Input string to convert
+ * @return New string containing the converted characters
+ */
+__device__ inline udf_string title(chars_tables const tables, string_view d_str)
+{
+  auto next_fn = [](cudf::strings::detail::character_flags_table_type flag) -> bool {
+    return !cudf::strings::detail::IS_ALPHA(flag);
+  };
+  return detail::capitalize(tables, d_str, next_fn);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
new file mode 100644
index 00000000000..c8c9f6e46f4
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/numeric.cuh
@@ -0,0 +1,72 @@
+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/convert/int_to_string.cuh>
+#include <cudf/strings/detail/convert/string_to_float.cuh>
+#include <cudf/strings/detail/convert/string_to_int.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Converts a string into an integer
+ *
+ * The '+' and '-' are allowed but only at the beginning of the string.
+ * The string is expected to contain base-10 [0-9] characters only.
+ * Any other character will end the parse.
+ * Overflow of the int64 type is not detected.
+ */
+__device__ inline int64_t stoi(string_view const& d_str)
+{
+  return cudf::strings::detail::string_to_integer(d_str);
+}
+
+/**
+ * @brief Converts an integer into string
+ *
+ * @param value integer value to convert
+ */
+__device__ inline udf_string to_string(int64_t value)
+{
+  udf_string result;
+  if (value == 0) {
+    result.append("0");
+    return result;
+  }
+  result.resize(cudf::strings::detail::count_digits(value));
+  cudf::strings::detail::integer_to_string(value, result.data());
+  return result;
+}
+
+/**
+ * @brief Converts a string into a double
+ *
+ * This function supports scientific notation.
+ * Overflow goes to inf or -inf and underflow may go to 0.
+ */
+__device__ inline double stod(string_view const& d_str)
+{
+  return cudf::strings::detail::stod(d_str);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
new file mode 100644
index 00000000000..d6d4ed637e9
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/pad.cuh
@@ -0,0 +1,72 @@
+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/pad_impl.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Pad beginning and/or end of a string with the given fill character
+ *
+ * The side_type::BOTH will attempt to center the text using the `fill_char`.
+ * If `width <= d_str.length()` no change occurs and the input `d_str` is returned.
+ *
+ * @tparam side Specify where the padding should occur
+ * @param d_str String to pad
+ * @param width Minimum length in characters of the output string
+ * @param fill_char Character used for padding
+ */
+template <side_type side = side_type::RIGHT>
+__device__ udf_string pad(cudf::string_view const d_str,
+                          cudf::size_type width,
+                          cudf::string_view fill_char = cudf::string_view{" ", 1})
+{
+  if (fill_char.empty()) { return udf_string{d_str}; }
+
+  udf_string result;
+  result.resize(cudf::strings::detail::compute_padded_size(d_str, width, fill_char.size_bytes()));
+  cudf::strings::detail::pad_impl<side>(d_str, width, *fill_char.begin(), result.data());
+  return result;
+}
+
+/**
+ * @brief Pad beginning of a string with zero '0'
+ *
+ * If the `width` is smaller than the length of `d_str` no change occurs.
+ *
+ * If `d_str` starts with a sign character ('-' or '+') then '0' padding
+ * starts after the sign.
+ *
+ * @param d_str String to fill
+ * @param width Minimum length in characters of the output string (including the sign character)
+ */
+__device__ udf_string zfill(cudf::string_view const d_str, cudf::size_type width)
+{
+  udf_string result;
+  result.resize(cudf::strings::detail::compute_padded_size(d_str, width, 1));
+  cudf::strings::detail::zfill_impl(d_str, width, result.data());
+  return result;
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
new file mode 100644
index 00000000000..ca31425aa62
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/split.cuh
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/split_utils.cuh>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+namespace detail {
+
+/**
+ * @brief Split string using given string
+ *
+ * The caller must allocate an array of cudf::string_view to be filled
+ * in by this function. This function can be called with a `result=nullptr`
+ * to compute the number of tokens.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto tgt = cudf::string_view{" ", 1};
+ * auto token_count = split(d_str, tgt, nullptr);
+ * auto result = new cudf::string_view[token_count];
+ * split(d_str, tgt, result);
+ * // result is array like ["the", "best", "", "of", "times", ""]
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param tgt String to split on
+ * @param result Empty array to populate with output objects.
+ *               Pass `nullptr` to just get the token count.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        cudf::string_view const tgt,
+                                        cudf::string_view* result)
+{
+  auto const nchars     = d_str.length();
+  cudf::size_type count = 0;
+
+  cudf::size_type last_pos = 0;
+  while (last_pos <= nchars) {
+    cudf::size_type const pos = d_str.find(tgt, last_pos);
+    auto const length         = (pos < 0 ? nchars : pos) - last_pos;
+    if (result) { *result++ = d_str.substr(last_pos, length); }
+    last_pos = pos + tgt.length();
+    ++count;
+    if (pos < 0) { break; }
+  }
+
+  return count;
+}
+}  // namespace detail
+
+/**
+ * @brief Count tokens in a string without performing the split
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto tgt = cudf::string_view{" ", 1};
+ * auto token_count = count_tokens(d_str, tgt);
+ * // token_count is 6
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param tgt String to split on
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type count_tokens(cudf::string_view const d_str,
+                                               cudf::string_view const tgt)
+{
+  return detail::split(d_str, tgt, nullptr);
+}
+
+/**
+ * @brief Split string using given string
+ *
+ * The caller must allocate an array of cudf::string_view to be filled
+ * in by this function.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto tgt = cudf::string_view{" ", 1};
+ * auto token_count = count_tokens(d_str, tgt);
+ * auto result = new cudf::string_view[token_count];
+ * split(d_str, tgt, result);
+ * // result is array like ["the", "best", "", "of", "times", ""]
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param tgt String to split on
+ * @param result Empty array to populate with output objects.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        cudf::string_view const tgt,
+                                        cudf::string_view* result)
+{
+  return detail::split(d_str, tgt, result);
+}
+
+/**
+ * @brief Split string using given target array
+ *
+ * @param d_str String to split
+ * @param tgt Character array encoded in UTF-8 used for identifying split points
+ * @param bytes Number of bytes to read from `tgt`
+ * @param result Empty array to populate with output objects
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        char const* tgt,
+                                        cudf::size_type bytes,
+                                        cudf::string_view* result)
+{
+  return detail::split(d_str, cudf::string_view{tgt, bytes}, result);
+}
+
+/**
+ * @brief Split string using given target array
+ *
+ * @param d_str String to split
+ * @param tgt Null-terminated character array encoded in UTF-8 used for identifying split points
+ * @param result Empty array to populate with output objects
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str,
+                                        char const* tgt,
+                                        cudf::string_view* result)
+{
+  return split(d_str, tgt, detail::bytes_in_null_terminated_string(tgt), result);
+}
+
+namespace detail {
+/**
+ * @brief Split string on whitespace
+ *
+ * The caller must allocate an array of cudf::string_view to be filled
+ * in by this function. This function can be called with a `result=nullptr`
+ * to compute the number of tokens.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto token_count = split(d_str, nullptr);
+ * auto result = new cudf::string_view[token_count];
+ * split(d_str, result);
+ * // result is array like ["the", "best", "of", "times"]
+ * @endcode
+ *
+ * @param d_str String to split
+ * @param result Empty array to populate with output objects.
+ *               Pass `nullptr` to just get the token count.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str, cudf::string_view* result)
+{
+  cudf::strings::detail::whitespace_string_tokenizer tokenizer{d_str};
+  cudf::size_type count = 0;
+  while (tokenizer.next_token()) {
+    auto token = tokenizer.get_token();
+    if (result) { *result++ = d_str.substr(token.first, token.second - token.first); }
+    ++count;
+  }
+  return count;
+}
+}  // namespace detail
+
+/**
+ * @brief Count tokens in a string without performing the split on whitespace
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{"the best  of times ", 19};
+ * auto token_count = count_tokens(d_str);
+ * // token_count is 4
+ * @endcode
+ *
+ * @param d_str String to split
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type count_tokens(cudf::string_view const d_str)
+{
+  return detail::split(d_str, nullptr);
+}
+
+/**
+ * @brief Split string on whitespace
+ *
+ * This will create tokens by splitting on one or more consecutive whitespace characters
+ * found in `d_str`.
+ *
+ * @param d_str String to split
+ * @param result Empty array to populate with output objects.
+ * @return Number of tokens returned
+ */
+__device__ inline cudf::size_type split(cudf::string_view const d_str, cudf::string_view* result)
+{
+  return detail::split(d_str, result);
+}
+
+/**
+ * @brief Join an array of strings with a separator
+ *
+ * @code{.cpp}
+ * auto separator = cudf::string_view{"::", 2};
+ * cudf::string_view input[] = {
+ *   cudf::string_view{"hello", 5},
+ *   cudf::string_view{"goodbye", 7},
+ *   cudf::string_view{"world", 5} };
+ *
+ * auto result = join(separator, input, 3);
+ * // result is "hello::goodbye::world"
+ * @endcode
+ *
+ * @param separator Separator string
+ * @param input An array of strings to join
+ * @param count Number of elements in `input`
+ * @return New string
+ */
+__device__ inline udf_string join(cudf::string_view const separator,
+                                  cudf::string_view* input,
+                                  cudf::size_type count)
+{
+  udf_string result{""};
+  while (count-- > 0) {
+    result += *input++;
+    if (count > 0) { result += separator; }
+  }
+  return result;
+}
+
+/**
+ * @brief Join an array of strings with a separator
+ *
+ * @param separator Null-terminated UTF-8 string
+ * @param bytes Number of bytes to read from `separator`
+ * @param input An array of strings to join
+ * @param count Number of elements in `input`
+ * @return New string
+ */
+__device__ inline udf_string join(char const* separator,
+                                  cudf::size_type bytes,
+                                  cudf::string_view* input,
+                                  cudf::size_type count)
+{
+  return join(cudf::string_view{separator, bytes}, input, count);
+}
+
+/**
+ * @brief Join an array of strings with a separator
+ *
+ * @param separator Null-terminated UTF-8 string
+ * @param input An array of strings to join
+ * @param count Number of elements in `input`
+ * @return New string
+ */
+__device__ inline udf_string join(char const* separator,
+                                  cudf::string_view* input,
+                                  cudf::size_type count)
+{
+  return join(separator, detail::bytes_in_null_terminated_string(separator), input, count);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
new file mode 100644
index 00000000000..f2db3073460
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/strip.cuh
@@ -0,0 +1,80 @@
+
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include "udf_string.cuh"
+
+#include <cudf/strings/detail/strip.cuh>
+#include <cudf/strings/string_view.cuh>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Strip characters from the beginning and/or end of the given string
+ *
+ * The `d_to_strip` is interpreted as an array of characters to be removed.
+ * If `d_to_strip` is an empty string, whitespace characters are stripped.
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{" aba ", 5};
+ * auto d_to_strip = cudf::string_view{}; // empty string
+ * auto result = strip(d_str, d_to_strip);
+ * // result is "aba"
+ * d_to_strip = cudf::string_view{" a", 2}; // space and 'a'
+ * result = strip(d_str, d_to_strip);
+ * // result is "b" ('a' or ' ' removed from the ends)
+ * @endcode
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{" aba ", 5};
+ * auto d_to_strip = cudf::string_view{}; // empty string
+ * auto result = strip(d_str, d_to_strip, side_type::LEFT);
+ * // result is "aba "
+ * d_to_strip = cudf::string_view{"a ", 2}; // 'a' and space
+ * result = strip(d_str, d_to_strip, side_type::LEFT);
+ * // result is "ba " ('a' or ' ' removed from the beginning)
+ * @endcode
+ *
+ * @code{.cpp}
+ * auto d_str = cudf::string_view{" aba ", 5};
+ * auto d_to_strip = cudf::string_view{}; // empty string
+ * auto result = strip(d_str, d_to_strip, side_type::RIGHT);
+ * // result is " aba"
+ * d_to_strip = cudf::string_view{" a", 2}; // space and 'a'
+ * result = rstrip(d_str, d_to_strip, side_type::RIGHT);
+ * // result is " ab" ('a' or ' ' removed from the end)
+ * @endcode
+ *
+ * @param d_str String to strip characters from
+ * @param d_to_strip Characters to remove
+ * @param stype From where to strip the characters;
+ *              Default `BOTH` indicates stripping characters from the
+ *              beginning and the end of the input string `d_str`
+ * @return New string with characters removed
+ */
+__device__ udf_string strip(cudf::string_view const d_str,
+                            cudf::string_view const d_to_strip,
+                            side_type stype = side_type::BOTH)
+{
+  return udf_string{cudf::strings::detail::strip(d_str, d_to_strip, stype)};
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
index 6de9b91de08..68834afa082 100644
--- a/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_apis.hpp
@@ -16,6 +16,7 @@
 
 #pragma once
 
+#include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
 
 #include <rmm/device_buffer.hpp>
@@ -26,14 +27,47 @@ namespace cudf {
 namespace strings {
 namespace udf {
 
+class udf_string;
+
 /**
  * @brief Return a cudf::string_view array for the given strings column
  *
+ * No string data is copied so the input column controls the lifetime of the
+ * underlying strings.
+ *
+ * New device memory is allocated and returned to hold just the string_view instances.
+ *
  * @param input Strings column to convert to a string_view array.
- * @throw cudf::logic_error if input is not a strings column.
+ * @return Array of string_view objects in device memory
  */
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input);
 
+/**
+ * @brief Return a STRINGS column given an array of udf_string objects
+ *
+ * This will make a copy of the strings in d_string in order to build
+ * the output column.
+ * The individual udf_strings are also cleared freeing each of their internal
+ * device memory buffers.
+ *
+ * @param d_strings Pointer to device memory of udf_string objects
+ * @param size The number of elements in the d_strings array
+ * @return A strings column copy of the udf_string objects
+ */
+std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
+                                                           cudf::size_type size);
+
+/**
+ * @brief Frees a vector of udf_string objects
+ *
+ * The individual udf_strings are cleared freeing each of their internal
+ * device memory buffers.
+ *
+ * @param d_strings Pointer to device memory of udf_string objects
+ * @param size The number of elements in the d_strings array
+ */
+void free_udf_string_array(udf_string* d_strings, cudf::size_type size);
+
 }  // namespace udf
 }  // namespace strings
 }  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
new file mode 100644
index 00000000000..5c9a02a9510
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.cuh
@@ -0,0 +1,457 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "udf_string.hpp"
+
+#include <cudf/strings/detail/utf8.hpp>
+#include <cudf/strings/string_view.cuh>
+
+#include <algorithm>
+#include <limits>
+#include <string>
+
+namespace cudf {
+namespace strings {
+namespace udf {
+namespace detail {
+
+/**
+ * @brief Count the bytes in a null-terminated character array
+ *
+ * @param str Null-terminated string
+ * @return Number of bytes in `str` upto but not including the null-terminator
+ */
+__device__ inline static cudf::size_type bytes_in_null_terminated_string(char const* str)
+{
+  if (!str) return 0;
+  cudf::size_type bytes = 0;
+  while (*str++)
+    ++bytes;
+  return bytes;
+}
+
+}  // namespace detail
+
+/**
+ * @brief Allocate memory for strings operation
+ *
+ * @param bytes Number of bytes in to allocate
+ * @return Pointer to allocated memory
+ */
+__device__ inline char* udf_string::allocate(cudf::size_type bytes)
+{
+  char* data  = static_cast<char*>(malloc(bytes + 1));
+  data[bytes] = '\0';  // add null-terminator so we can printf strings in device code
+  return data;
+}
+
+/**
+ * @brief Free memory created by allocate()
+ *
+ * @param data Pointer to allocated memory
+ */
+__device__ inline void udf_string::deallocate(char* data)
+{
+  if (data) free(data);
+}
+
+/**
+ * @brief Allocate memory for strings operation
+ *
+ * Reallocates memory for `m_data` with new size `bytes`
+ * The original data in `m_data` is preserved up to `min(bytes,m_bytes)`
+ *
+ * @param bytes Number of bytes in to allocate
+ * @return Pointer to allocated memory
+ */
+__device__ void udf_string::reallocate(cudf::size_type bytes)
+{
+  m_capacity    = bytes;
+  auto new_data = allocate(m_capacity);
+  memcpy(new_data, m_data, std::min(m_bytes, bytes));
+  deallocate(m_data);
+  m_data = new_data;
+}
+
+__device__ inline udf_string::udf_string(char const* data, cudf::size_type bytes)
+  : m_bytes(bytes), m_capacity(bytes)
+{
+  m_data = allocate(m_capacity);
+  memcpy(m_data, data, bytes);
+}
+
+__device__ udf_string::udf_string(cudf::size_type count, cudf::char_utf8 chr)
+{
+  if (count <= 0) { return; }
+  m_bytes = m_capacity = cudf::strings::detail::bytes_in_char_utf8(chr) * count;
+  m_data               = allocate(m_capacity);
+  auto out_ptr         = m_data;
+  for (cudf::size_type idx = 0; idx < count; ++idx) {
+    out_ptr += cudf::strings::detail::from_char_utf8(chr, out_ptr);
+  }
+}
+
+__device__ inline udf_string::udf_string(char const* data)
+  : udf_string(data, detail::bytes_in_null_terminated_string(data))
+{
+}
+
+__device__ inline udf_string::udf_string(udf_string const& src)
+  : udf_string(src.m_data, src.m_bytes)
+{
+}
+
+__device__ inline udf_string::udf_string(udf_string&& src) noexcept
+  : m_data(src.m_data), m_bytes(src.m_bytes), m_capacity(src.m_capacity)
+{
+  src.m_data     = nullptr;
+  src.m_bytes    = 0;
+  src.m_capacity = 0;
+}
+
+__device__ inline udf_string::udf_string(cudf::string_view str)
+  : udf_string(str.data(), str.size_bytes())
+{
+}
+
+__device__ inline udf_string::~udf_string() { deallocate(m_data); }
+
+__device__ inline udf_string& udf_string::operator=(udf_string const& str) { return assign(str); }
+
+__device__ inline udf_string& udf_string::operator=(udf_string&& str) noexcept
+{
+  return assign(std::move(str));
+}
+
+__device__ inline udf_string& udf_string::operator=(cudf::string_view str) { return assign(str); }
+
+__device__ inline udf_string& udf_string::operator=(char const* str) { return assign(str); }
+
+__device__ udf_string& udf_string::assign(udf_string&& str) noexcept
+{
+  if (this == &str) { return *this; }
+  m_data         = str.m_data;
+  m_bytes        = str.m_bytes;
+  m_capacity     = str.m_capacity;
+  str.m_data     = nullptr;
+  str.m_bytes    = 0;
+  str.m_capacity = 0;
+  return *this;
+}
+
+__device__ udf_string& udf_string::assign(cudf::string_view str)
+{
+  return assign(str.data(), str.size_bytes());
+}
+
+__device__ udf_string& udf_string::assign(char const* str)
+{
+  return assign(str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ udf_string& udf_string::assign(char const* str, cudf::size_type bytes)
+{
+  if (bytes >= m_capacity) {
+    deallocate(m_data);
+    m_capacity = bytes;
+    m_data     = allocate(m_capacity);
+  }
+  m_bytes = bytes;
+  memcpy(m_data, str, bytes);
+  m_data[m_bytes] = '\0';
+  return *this;
+}
+
+__device__ inline cudf::size_type udf_string::size_bytes() const noexcept { return m_bytes; }
+
+__device__ inline cudf::size_type udf_string::length() const noexcept
+{
+  return cudf::strings::detail::characters_in_string(m_data, m_bytes);
+}
+
+__device__ constexpr cudf::size_type udf_string::max_size() const noexcept
+{
+  return std::numeric_limits<cudf::size_type>::max() - 1;
+}
+
+__device__ inline char* udf_string::data() noexcept { return m_data; }
+
+__device__ inline char const* udf_string::data() const noexcept { return m_data; }
+
+__device__ inline bool udf_string::is_empty() const noexcept { return m_bytes == 0; }
+
+__device__ inline cudf::string_view::const_iterator udf_string::begin() const noexcept
+{
+  return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), 0);
+}
+
+__device__ inline cudf::string_view::const_iterator udf_string::end() const noexcept
+{
+  return cudf::string_view::const_iterator(cudf::string_view(m_data, m_bytes), length());
+}
+
+__device__ inline cudf::char_utf8 udf_string::at(cudf::size_type pos) const
+{
+  auto const offset = byte_offset(pos);
+  auto chr          = cudf::char_utf8{0};
+  if (offset < m_bytes) { cudf::strings::detail::to_char_utf8(data() + offset, chr); }
+  return chr;
+}
+
+__device__ inline cudf::char_utf8 udf_string::operator[](cudf::size_type pos) const
+{
+  return at(pos);
+}
+
+__device__ inline cudf::size_type udf_string::byte_offset(cudf::size_type pos) const
+{
+  cudf::size_type offset = 0;
+
+  auto start = m_data;
+  auto end   = start + m_bytes;
+  while ((pos > 0) && (start < end)) {
+    auto const byte       = static_cast<uint8_t>(*start++);
+    auto const char_bytes = cudf::strings::detail::bytes_in_utf8_byte(byte);
+    if (char_bytes) { --pos; }
+    offset += char_bytes;
+  }
+  return offset;
+}
+
+__device__ inline int udf_string::compare(cudf::string_view in) const noexcept
+{
+  return compare(in.data(), in.size_bytes());
+}
+
+__device__ inline int udf_string::compare(char const* data, cudf::size_type bytes) const
+{
+  auto const view = static_cast<cudf::string_view>(*this);
+  return view.compare(data, bytes);
+}
+
+__device__ inline bool udf_string::operator==(cudf::string_view rhs) const noexcept
+{
+  return m_bytes == rhs.size_bytes() && compare(rhs) == 0;
+}
+
+__device__ inline bool udf_string::operator!=(cudf::string_view rhs) const noexcept
+{
+  return compare(rhs) != 0;
+}
+
+__device__ inline bool udf_string::operator<(cudf::string_view rhs) const noexcept
+{
+  return compare(rhs) < 0;
+}
+
+__device__ inline bool udf_string::operator>(cudf::string_view rhs) const noexcept
+{
+  return compare(rhs) > 0;
+}
+
+__device__ inline bool udf_string::operator<=(cudf::string_view rhs) const noexcept
+{
+  return compare(rhs) <= 0;
+}
+
+__device__ inline bool udf_string::operator>=(cudf::string_view rhs) const noexcept
+{
+  return compare(rhs) >= 0;
+}
+
+__device__ inline void udf_string::clear() noexcept
+{
+  deallocate(m_data);
+  m_data     = nullptr;
+  m_bytes    = 0;
+  m_capacity = 0;
+}
+
+__device__ inline void udf_string::resize(cudf::size_type count)
+{
+  if (count > max_size()) { return; }
+  if (count > m_capacity) { reallocate(count); }
+
+  // add padding if necessary (null chars)
+  if (count > m_bytes) { memset(m_data + m_bytes, 0, count - m_bytes); }
+
+  m_bytes         = count;
+  m_data[m_bytes] = '\0';
+}
+
+__device__ void udf_string::reserve(cudf::size_type count)
+{
+  if (count < max_size() && count > m_capacity) { reallocate(count); }
+}
+
+__device__ cudf::size_type udf_string::capacity() const noexcept { return m_capacity; }
+
+__device__ void udf_string::shrink_to_fit()
+{
+  if (m_bytes < m_capacity) { reallocate(m_bytes); }
+}
+
+__device__ inline udf_string& udf_string::append(char const* str, cudf::size_type bytes)
+{
+  if (bytes <= 0) { return *this; }
+  auto const nbytes = m_bytes + bytes;
+  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
+  memcpy(m_data + m_bytes, str, bytes);
+  m_bytes         = nbytes;
+  m_data[m_bytes] = '\0';
+  return *this;
+}
+
+__device__ inline udf_string& udf_string::append(char const* str)
+{
+  return append(str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ inline udf_string& udf_string::append(cudf::char_utf8 chr, cudf::size_type count)
+{
+  auto d_str = udf_string(count, chr);
+  return append(d_str);
+}
+
+__device__ inline udf_string& udf_string::append(cudf::string_view in)
+{
+  return append(in.data(), in.size_bytes());
+}
+
+__device__ inline udf_string& udf_string::operator+=(cudf::string_view in) { return append(in); }
+
+__device__ inline udf_string& udf_string::operator+=(cudf::char_utf8 chr) { return append(chr); }
+
+__device__ inline udf_string& udf_string::operator+=(char const* str) { return append(str); }
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos,
+                                                 char const* str,
+                                                 cudf::size_type in_bytes)
+{
+  return replace(pos, 0, str, in_bytes);
+}
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos, char const* str)
+{
+  return insert(pos, str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos, cudf::string_view in)
+{
+  return insert(pos, in.data(), in.size_bytes());
+}
+
+__device__ inline udf_string& udf_string::insert(cudf::size_type pos,
+                                                 cudf::size_type count,
+                                                 cudf::char_utf8 chr)
+{
+  return replace(pos, 0, count, chr);
+}
+
+__device__ inline udf_string udf_string::substr(cudf::size_type pos, cudf::size_type count) const
+{
+  if (pos < 0) { return udf_string{"", 0}; }
+  auto const start_pos = byte_offset(pos);
+  if (start_pos >= m_bytes) { return udf_string{"", 0}; }
+  auto const end_pos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+  return udf_string{data() + start_pos, end_pos - start_pos};
+}
+
+// utility for replace()
+__device__ void udf_string::shift_bytes(cudf::size_type start_pos,
+                                        cudf::size_type end_pos,
+                                        cudf::size_type nbytes)
+{
+  if (nbytes < m_bytes) {
+    // shift bytes to the left [...wxyz] -> [wxyzxyz]
+    auto src = end_pos;
+    auto tgt = start_pos;
+    while (tgt < nbytes) {
+      m_data[tgt++] = m_data[src++];
+    }
+  } else if (nbytes > m_bytes) {
+    // shift bytes to the right [abcd...] -> [abcabcd]
+    auto src = m_bytes;
+    auto tgt = nbytes;
+    while (src > end_pos) {
+      m_data[--tgt] = m_data[--src];
+    }
+  }
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  char const* str,
+                                                  cudf::size_type in_bytes)
+{
+  if (pos < 0 || in_bytes < 0) { return *this; }
+  auto const start_pos = byte_offset(pos);
+  if (start_pos > m_bytes) { return *this; }
+  auto const end_pos = count < 0 ? m_bytes : std::min(byte_offset(pos + count), m_bytes);
+
+  // compute new size
+  auto const nbytes = m_bytes + in_bytes - (end_pos - start_pos);
+  if (nbytes > m_capacity) { reallocate(2 * nbytes); }
+
+  // move bytes -- make room for replacement
+  shift_bytes(start_pos + in_bytes, end_pos, nbytes);
+
+  // insert the replacement
+  memcpy(m_data + start_pos, str, in_bytes);
+
+  m_bytes         = nbytes;
+  m_data[m_bytes] = '\0';
+  return *this;
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  char const* str)
+{
+  return replace(pos, count, str, detail::bytes_in_null_terminated_string(str));
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  cudf::string_view in)
+{
+  return replace(pos, count, in.data(), in.size_bytes());
+}
+
+__device__ inline udf_string& udf_string::replace(cudf::size_type pos,
+                                                  cudf::size_type count,
+                                                  cudf::size_type chr_count,
+                                                  cudf::char_utf8 chr)
+{
+  auto d_str = udf_string(chr_count, chr);
+  return replace(pos, count, d_str);
+}
+
+__device__ udf_string& udf_string::erase(cudf::size_type pos, cudf::size_type count)
+{
+  return replace(pos, count, nullptr, 0);
+}
+
+__device__ inline cudf::size_type udf_string::char_offset(cudf::size_type byte_pos) const
+{
+  return cudf::strings::detail::characters_in_string(data(), byte_pos);
+}
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
new file mode 100644
index 00000000000..2bbda357cee
--- /dev/null
+++ b/python/strings_udf/cpp/include/cudf/strings/udf/udf_string.hpp
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/string_view.hpp>
+
+#include <cuda_runtime.h>
+
+// This header contains all class and function declarations so that it
+// can be included in a .cpp file which only has declaration requirements
+// (i.e. sizeof, conditionally-comparable, explicit conversions, etc).
+// The definitions are coded in udf_string.cuh which is to be included
+// in .cu files that use this class in kernel calls.
+
+namespace cudf {
+namespace strings {
+namespace udf {
+
+/**
+ * @brief Device string class for use with user-defined functions
+ *
+ * This class manages a device buffer of UTF-8 encoded characters
+ * for string manipulation in a device kernel.
+ *
+ * Its methods and behavior are modelled after std::string but
+ * with special consideration for UTF-8 encoded strings and for
+ * use within a cuDF UDF.
+ */
+class udf_string {
+ public:
+  /**
+   * @brief Represents unknown character position or length
+   */
+  static constexpr cudf::size_type npos = static_cast<cudf::size_type>(-1);
+
+  /**
+   * @brief Cast to cudf::string_view operator
+   */
+  __device__ operator cudf::string_view() const { return cudf::string_view(m_data, m_bytes); }
+
+  /**
+   * @brief Create an empty string.
+   */
+  udf_string() = default;
+
+  /**
+   * @brief Create a string using existing device memory
+   *
+   * The given memory is copied into the instance returned.
+   *
+   * @param data Device pointer to UTF-8 encoded string
+   * @param bytes Number of bytes in `data`
+   */
+  __device__ udf_string(char const* data, cudf::size_type bytes);
+
+  /**
+   * @brief Create a string object from a null-terminated character array
+   *
+   * The given memory is copied into the instance returned.
+   *
+   * @param data Device pointer to UTF-8 encoded null-terminated
+   *             character array.
+   */
+  __device__ udf_string(char const* data);
+
+  /**
+   * @brief Create a string object from a cudf::string_view
+   *
+   * The input string data is copied into the instance returned.
+   *
+   * @param str String to copy
+   */
+  __device__ udf_string(cudf::string_view str);
+
+  /**
+   * @brief Create a string object with `count` copies of character `chr`
+   *
+   * @param count Number of times to copy `chr`
+   * @param chr Character from which to create the string
+   */
+  __device__ udf_string(cudf::size_type count, cudf::char_utf8 chr);
+
+  /**
+   * @brief Create a string object from another instance
+   *
+   * The string data is copied from the `src` into the instance returned.
+   *
+   * @param src String to copy
+   */
+  __device__ udf_string(udf_string const& src);
+
+  /**
+   * @brief Move a string object from an rvalue reference
+   *
+   * The string data is moved from `src` into the instance returned.
+   * The `src` will have no content.
+   *
+   * @param src String to copy
+   */
+  __device__ udf_string(udf_string&& src) noexcept;
+
+  __device__ ~udf_string();
+
+  __device__ udf_string& operator=(udf_string const&);
+  __device__ udf_string& operator=(udf_string&&) noexcept;
+  __device__ udf_string& operator=(cudf::string_view const);
+  __device__ udf_string& operator=(char const*);
+
+  /**
+   * @brief Return the number of bytes in this string
+   */
+  __device__ cudf::size_type size_bytes() const noexcept;
+
+  /**
+   * @brief Return the number of characters in this string
+   */
+  __device__ cudf::size_type length() const noexcept;
+
+  /**
+   * @brief Return the maximum number of bytes a udf_string can hold
+   */
+  __device__ constexpr cudf::size_type max_size() const noexcept;
+
+  /**
+   * @brief Return the internal pointer to the character array for this object
+   */
+  __device__ char* data() noexcept;
+  __device__ char const* data() const noexcept;
+
+  /**
+   * @brief Returns true if there are no characters in this string
+   */
+  __device__ bool is_empty() const noexcept;
+
+  /**
+   * @brief Returns an iterator that can be used to navigate through
+   *        the UTF-8 characters in this string
+   *
+   * This returns a `cudf::string_view::const_iterator` which is read-only.
+   */
+  __device__ cudf::string_view::const_iterator begin() const noexcept;
+  __device__ cudf::string_view::const_iterator end() const noexcept;
+
+  /**
+   * @brief Returns the character at the specified position
+   *
+   * This will return 0 if `pos >= length()`.
+   *
+   * @param pos Index position of character to return
+   * @return Character at position `pos`
+   */
+  __device__ cudf::char_utf8 at(cudf::size_type pos) const;
+
+  /**
+   * @brief Returns the character at the specified index
+   *
+   * This will return 0 if `pos >= length()`.
+   * Note this is read-only. Use replace() to modify a character.
+   *
+   * @param pos Index position of character to return
+   * @return Character at position `pos`
+   */
+  __device__ cudf::char_utf8 operator[](cudf::size_type pos) const;
+
+  /**
+   * @brief Return the byte offset for a given character position
+   *
+   * The byte offset for the character at `pos` such that
+   * `data() + byte_offset(pos)` points to the memory location
+   * the character at position `pos`.
+   *
+   * The behavior is undefined if `pos < 0 or pos >= length()`
+   *
+   * @param pos Index position of character to return byte offset.
+   * @return Byte offset for character at `pos`
+   */
+  __device__ cudf::size_type byte_offset(cudf::size_type pos) const;
+
+  /**
+   * @brief Comparing target string with this string
+   *
+   * @param str Target string to compare with this string
+   * @return 0  If they compare equal
+   *         <0 Either the value of the first character of this string that does
+   *            not match is ordered before the corresponding character in `str`,
+   *            or all compared characters match but the `str` string is shorter.
+   *         >0 Either the value of the first character of this string that does
+   *            not match is ordered after the corresponding character in `str`,
+   *            or all compared characters match but the `str` string is longer.
+   */
+  __device__ int compare(cudf::string_view str) const noexcept;
+
+  /**
+   * @brief Comparing target character array with this string
+   *
+   * @param str Target array of UTF-8 characters.
+   * @param bytes Number of bytes in `str`.
+   * @return 0  If they compare equal
+   *         <0 Either the value of the first character of this string that does
+   *            not match is ordered before the corresponding character in `str`,
+   *            or all compared characters match but `bytes < size_bytes()`.
+   *         >0 Either the value of the first character of this string that does
+   *            not match is ordered after the corresponding character in `str`,
+   *            or all compared characters match but `bytes > size_bytes()`.
+   */
+  __device__ int compare(char const* str, cudf::size_type bytes) const;
+
+  /**
+   * @brief Returns true if `rhs` matches this string exactly
+   */
+  __device__ bool operator==(cudf::string_view rhs) const noexcept;
+
+  /**
+   * @brief Returns true if `rhs` does not match this string
+   */
+  __device__ bool operator!=(cudf::string_view rhs) const noexcept;
+
+  /**
+   * @brief Returns true if this string is ordered before `rhs`
+   */
+  __device__ bool operator<(cudf::string_view rhs) const noexcept;
+
+  /**
+   * @brief Returns true if `rhs` is ordered before this string
+   */
+  __device__ bool operator>(cudf::string_view rhs) const noexcept;
+
+  /**
+   * @brief Returns true if this string matches or is ordered before `rhs`
+   */
+  __device__ bool operator<=(cudf::string_view rhs) const noexcept;
+
+  /**
+   * @brief Returns true if `rhs` matches or is ordered before this string
+   */
+  __device__ bool operator>=(cudf::string_view rhs) const noexcept;
+
+  /**
+   * @brief Remove all bytes from this string
+   *
+   * All pointers, references, and iterators are invalidated.
+   */
+  __device__ void clear() noexcept;
+
+  /**
+   * @brief Resizes string to contain `count` bytes
+   *
+   * If `count > size_bytes()` then zero-padding is added.
+   * If `count < size_bytes()` then the string is truncated to size `count`.
+   *
+   * All pointers, references, and iterators may be invalidated.
+   *
+   * The behavior is undefined if `count > max_size()`
+   *
+   * @param count Size in bytes of this string.
+   */
+  __device__ void resize(cudf::size_type count);
+
+  /**
+   * @brief Reserve `count` bytes in this string
+   *
+   * If `count > capacity()`, new memory is allocated and `capacity()` will
+   * be greater than or equal to `count`.
+   * There is no effect if `count <= capacity()`.
+   *
+   * @param count Total number of bytes to reserve for this string
+   */
+  __device__ void reserve(cudf::size_type count);
+
+  /**
+   * @brief Returns the number of bytes that the string has allocated
+   */
+  __device__ cudf::size_type capacity() const noexcept;
+
+  /**
+   * @brief Reduces internal allocation to just `size_bytes()`
+   *
+   * All pointers, references, and iterators may be invalidated.
+   */
+  __device__ void shrink_to_fit();
+
+  /**
+   * @brief Moves the contents of `str` into this string instance
+   *
+   * On return, the `str` will have no contents.
+   *
+   * @param str String to move
+   * @return This string with new contents
+   */
+  __device__ udf_string& assign(udf_string&& str) noexcept;
+
+  /**
+   * @brief Replaces the contents of this string with contents of `str`
+   *
+   * @param str String to copy
+   * @return This string with new contents
+   */
+  __device__ udf_string& assign(cudf::string_view str);
+
+  /**
+   * @brief Replaces the contents of this string with contents of `str`
+   *
+   * @param str Null-terminated UTF-8 character array
+   * @return This string with new contents
+   */
+  __device__ udf_string& assign(char const* str);
+
+  /**
+   * @brief Replaces the contents of this string with contents of `str`
+   *
+   * @param str UTF-8 character array
+   * @param bytes Number of bytes to copy from `str`
+   * @return This string with new contents
+   */
+  __device__ udf_string& assign(char const* str, cudf::size_type bytes);
+
+  /**
+   * @brief Append a string to the end of this string
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& operator+=(cudf::string_view str);
+
+  /**
+   * @brief Append a character to the end of this string
+   *
+   * @param str Character to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& operator+=(cudf::char_utf8 chr);
+
+  /**
+   * @brief Append a null-terminated device memory character array
+   * to the end of this string
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& operator+=(char const* str);
+
+  /**
+   * @brief Append a null-terminated character array to the end of this string
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& append(char const* str);
+
+  /**
+   * @brief Append a character array to the end of this string
+   *
+   * @param str Character array to append
+   * @param bytes Number of bytes from `str` to append.
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& append(char const* str, cudf::size_type bytes);
+
+  /**
+   * @brief Append a string to the end of this string
+   *
+   * @param str String to append
+   * @return This string with the appended argument
+   */
+  __device__ udf_string& append(cudf::string_view str);
+
+  /**
+   * @brief Append a character to the end of this string
+   * a specified number of times.
+   *
+   * @param chr Character to append
+   * @param count Number of times to append `chr`
+   * @return This string with the append character(s)
+   */
+  __device__ udf_string& append(cudf::char_utf8 chr, cudf::size_type count = 1);
+
+  /**
+   * @brief Insert a string into the character position specified
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param str String to insert into this one
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, cudf::string_view str);
+
+  /**
+   * @brief Insert a null-terminated character array into the character position specified
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param data Null-terminated character array to insert
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, char const* data);
+
+  /**
+   * @brief Insert a character array into the character position specified
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param data Character array to insert
+   * @param bytes Number of bytes from `data` to insert
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, char const* data, cudf::size_type bytes);
+
+  /**
+   * @brief Insert a character one or more times into the character position specified
+   *
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param count Number of times to insert `chr`
+   * @param chr Character to insert
+   * @return This string with the inserted argument
+   */
+  __device__ udf_string& insert(cudf::size_type pos, cudf::size_type count, cudf::char_utf8 chr);
+
+  /**
+   * @brief Returns a substring of this string
+   *
+   * An empty string is returned if `pos < 0 or pos >= length()`.
+   *
+   * @param pos Character position to start the substring
+   * @param count Number of characters for the substring;
+   *              This can be greater than the number of available characters.
+   *              Default npos returns characters in range `[pos, length())`.
+   * @return New string with the specified characters
+   */
+  __device__ udf_string substr(cudf::size_type pos, cudf::size_type count = npos) const;
+
+  /**
+   * @brief Replace a range of characters with a given string
+   *
+   * Replaces characters in range `[pos, pos + count]` with `str`.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * If `count==0` then `str` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param str String to replace the given range
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos, cudf::size_type count, cudf::string_view str);
+
+  /**
+   * @brief Replace a range of characters with a null-terminated character array
+   *
+   * Replaces characters in range `[pos, pos + count)` with `data`.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * If `count==0` then `data` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param data Null-terminated character array to replace the given range
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos, cudf::size_type count, char const* data);
+
+  /**
+   * @brief Replace a range of characters with a given character array
+   *
+   * Replaces characters in range `[pos, pos + count)` with `[data, data + bytes)`.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * If `count==0` then `data` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param data String to replace the given range
+   * @param bytes Number of bytes from data to use for replacement
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos,
+                                 cudf::size_type count,
+                                 char const* data,
+                                 cudf::size_type bytes);
+
+  /**
+   * @brief Replace a range of characters with a character one or more times
+   *
+   * Replaces characters in range `[pos, pos + count)` with `chr` `chr_count` times.
+   * There is no effect if `pos < 0 or pos > length()`.
+   *
+   * If `count==0` then `chr` is inserted starting at `pos`.
+   * If `count==npos` then the replacement range is `[pos,length())`.
+   *
+   * @param pos Position of first character to replace
+   * @param count Number of characters to replace
+   * @param chr_count Number of times `chr` will repeated
+   * @param chr Character to use for replacement
+   * @return This string modified with the replacement
+   */
+  __device__ udf_string& replace(cudf::size_type pos,
+                                 cudf::size_type count,
+                                 cudf::size_type chr_count,
+                                 cudf::char_utf8 chr);
+
+  /**
+   * @brief Removes specified characters from this string
+   *
+   * Removes `min(count, length() - pos)` characters starting at `pos`.
+   * There is no effect if `pos < 0 or pos >= length()`.
+   *
+   * @param pos Character position to begin insert
+   * @param count Number of characters to remove starting at `pos`
+   * @return This string with remove characters
+   */
+  __device__ udf_string& erase(cudf::size_type pos, cudf::size_type count = npos);
+
+ private:
+  char* m_data{};
+  cudf::size_type m_bytes{};
+  cudf::size_type m_capacity{};
+
+  // utilities
+  __device__ char* allocate(cudf::size_type bytes);
+  __device__ void deallocate(char* data);
+  __device__ void reallocate(cudf::size_type bytes);
+  __device__ cudf::size_type char_offset(cudf::size_type byte_pos) const;
+  __device__ void shift_bytes(cudf::size_type start_pos,
+                              cudf::size_type end_pos,
+                              cudf::size_type nbytes);
+};
+
+}  // namespace udf
+}  // namespace strings
+}  // namespace cudf
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index 89952dadb6c..7927740fd49 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -15,18 +15,44 @@
  */
 
 #include <cudf/strings/udf/udf_apis.hpp>
+#include <cudf/strings/udf/udf_string.cuh>
 
 #include <cudf/column/column_factories.hpp>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/string_view.cuh>
+#include <cudf/utilities/default_stream.hpp>
 
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/transform.h>
 
 namespace cudf {
 namespace strings {
 namespace udf {
 namespace detail {
+namespace {
+
+/**
+ * @brief Functor wraps string_view objects around udf_string objects
+ *
+ * No string data is copied.
+ */
+struct udf_string_to_string_view_transform_fn {
+  __device__ cudf::string_view operator()(cudf::strings::udf::udf_string const& dstr)
+  {
+    return cudf::string_view{dstr.data(), dstr.size_bytes()};
+  }
+};
 
+}  // namespace
+
+/**
+ * @copydoc to_string_view_array
+ *
+ * @param stream CUDA stream used for allocating/copying device memory and launching kernels
+ */
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input,
                                                          rmm::cuda_stream_view stream)
 {
@@ -36,13 +62,61 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
                 .release()));
 }
 
+/**
+ * @copydoc column_from_udf_string_array
+ *
+ * @param stream CUDA stream used for allocating/copying device memory and launching kernels
+ */
+std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
+                                                           cudf::size_type size,
+                                                           rmm::cuda_stream_view stream)
+{
+  // create string_views of the udf_strings
+  auto indices = rmm::device_uvector<cudf::string_view>(size, stream);
+  thrust::transform(rmm::exec_policy(stream),
+                    d_strings,
+                    d_strings + size,
+                    indices.data(),
+                    udf_string_to_string_view_transform_fn{});
+
+  return cudf::make_strings_column(indices, cudf::string_view(nullptr, 0), stream);
+}
+
+/**
+ * @copydoc free_udf_string_array
+ *
+ * @param stream CUDA stream used for allocating/copying device memory and launching kernels
+ */
+void free_udf_string_array(cudf::strings::udf::udf_string* d_strings,
+                           cudf::size_type size,
+                           rmm::cuda_stream_view stream)
+{
+  thrust::for_each_n(rmm::exec_policy(stream),
+                     thrust::make_counting_iterator(0),
+                     size,
+                     [d_strings] __device__(auto idx) { d_strings[idx].clear(); });
+}
+
 }  // namespace detail
 
+// external APIs
+
 std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const input)
 {
   return detail::to_string_view_array(input, cudf::get_default_stream());
 }
 
+std::unique_ptr<cudf::column> column_from_udf_string_array(udf_string* d_strings,
+                                                           cudf::size_type size)
+{
+  return detail::column_from_udf_string_array(d_strings, size, cudf::get_default_stream());
+}
+
+void free_udf_string_array(udf_string* d_strings, cudf::size_type size)
+{
+  detail::free_udf_string_array(d_strings, size, cudf::get_default_stream());
+}
+
 }  // namespace udf
 }  // namespace strings
 }  // namespace cudf

From d6a9e4a58593d0b13308070900bc94894b3f7e41 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 2 Nov 2022 17:13:52 +0100
Subject: [PATCH 112/202] Rollback of `DeviceBufferLike` (#12009)

This PR replaces `DeviceBufferLike` with `Buffer` and clear the way for a spillable sub-class of `Buffer`.

#### Context
The introduction of the [`DeviceBufferLike`](https://github.com/rapidsai/cudf/pull/11447) protocol was motivated by [the spilling work](https://github.com/rapidsai/cudf/pull/11553), which we initially thought would have to be implemented in Cython. However, it can be done in pure Python, which makes `DeviceBufferLike` an unneeded complexity.

#### Review notes

- In order to introduce a spillable-buffer in the future, we still use a factory function, `as_buffer()`, to create Buffers.
- `buffer.py` is moved into the submodule `core.buffer` to ease organization when adding the spillable-buffer and spilling manager.

#### Breaking
This PR breaks external use of `Buffer` e.g. `Buffer.__init__` raise an exception now and the `"constructor-kwargs"` header from #4164 has been removed.
Submitted a PR to fix this in cuml: https://github.com/rapidsai/cuml/pull/4965

##

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/12009
---
 python/cudf/cudf/_lib/column.pyi              |  28 +-
 python/cudf/cudf/_lib/column.pyx              |  44 +--
 python/cudf/cudf/_lib/concat.pyx              |   4 +-
 python/cudf/cudf/_lib/copying.pyx             |   4 +-
 python/cudf/cudf/_lib/null_mask.pyx           |  10 +-
 python/cudf/cudf/_lib/transform.pyx           |  12 +-
 python/cudf/cudf/_lib/utils.pyx               |   4 +-
 python/cudf/cudf/core/abc.py                  |  30 +-
 python/cudf/cudf/core/buffer.py               | 325 ------------------
 python/cudf/cudf/core/buffer/__init__.py      |   4 +
 python/cudf/cudf/core/buffer/buffer.py        | 319 +++++++++++++++++
 python/cudf/cudf/core/buffer/utils.py         |  67 ++++
 python/cudf/cudf/core/column/categorical.py   |   8 +-
 python/cudf/cudf/core/column/column.py        |  52 ++-
 python/cudf/cudf/core/column/datetime.py      |  31 +-
 python/cudf/cudf/core/column/decimal.py       |   6 +-
 python/cudf/cudf/core/column/numerical.py     |  34 +-
 python/cudf/cudf/core/column/string.py        |   6 +-
 python/cudf/cudf/core/column/timedelta.py     |  14 +-
 python/cudf/cudf/core/df_protocol.py          |  22 +-
 python/cudf/cudf/core/dtypes.py               |   4 +-
 python/cudf/cudf/core/index.py                |   2 +-
 python/cudf/cudf/core/series.py               |   2 -
 python/cudf/cudf/tests/test_buffer.py         |  40 +--
 python/cudf/cudf/tests/test_column.py         |   2 +-
 .../cudf/tests/test_cuda_array_interface.py   |   4 +-
 python/cudf/cudf/tests/test_df_protocol.py    |   4 +-
 python/cudf/cudf/tests/test_pickling.py       |   4 +-
 python/cudf/cudf/tests/test_testing.py        |   2 +-
 python/cudf/cudf/utils/utils.py               |   6 +-
 .../strings_udf/_lib/cudf_jit_udf.pyx         |   4 +-
 31 files changed, 559 insertions(+), 539 deletions(-)
 delete mode 100644 python/cudf/cudf/core/buffer.py
 create mode 100644 python/cudf/cudf/core/buffer/__init__.py
 create mode 100644 python/cudf/cudf/core/buffer/buffer.py
 create mode 100644 python/cudf/cudf/core/buffer/utils.py

diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
index fd9aab038d4..c38c560b982 100644
--- a/python/cudf/cudf/_lib/column.pyi
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -5,16 +5,16 @@ from __future__ import annotations
 from typing import Dict, Optional, Tuple, TypeVar
 
 from cudf._typing import Dtype, DtypeObj, ScalarLike
-from cudf.core.buffer import DeviceBufferLike
+from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase
 
 T = TypeVar("T")
 
 class Column:
-    _data: Optional[DeviceBufferLike]
-    _mask: Optional[DeviceBufferLike]
-    _base_data: Optional[DeviceBufferLike]
-    _base_mask: Optional[DeviceBufferLike]
+    _data: Optional[Buffer]
+    _mask: Optional[Buffer]
+    _base_data: Optional[Buffer]
+    _base_mask: Optional[Buffer]
     _dtype: DtypeObj
     _size: int
     _offset: int
@@ -25,10 +25,10 @@ class Column:
 
     def __init__(
         self,
-        data: Optional[DeviceBufferLike],
+        data: Optional[Buffer],
         size: int,
         dtype: Dtype,
-        mask: Optional[DeviceBufferLike] = None,
+        mask: Optional[Buffer] = None,
         offset: int = None,
         null_count: int = None,
         children: Tuple[ColumnBase, ...] = (),
@@ -40,27 +40,27 @@ class Column:
     @property
     def size(self) -> int: ...
     @property
-    def base_data(self) -> Optional[DeviceBufferLike]: ...
+    def base_data(self) -> Optional[Buffer]: ...
     @property
     def base_data_ptr(self) -> int: ...
     @property
-    def data(self) -> Optional[DeviceBufferLike]: ...
+    def data(self) -> Optional[Buffer]: ...
     @property
     def data_ptr(self) -> int: ...
-    def set_base_data(self, value: DeviceBufferLike) -> None: ...
+    def set_base_data(self, value: Buffer) -> None: ...
     @property
     def nullable(self) -> bool: ...
     def has_nulls(self, include_nan: bool = False) -> bool: ...
     @property
-    def base_mask(self) -> Optional[DeviceBufferLike]: ...
+    def base_mask(self) -> Optional[Buffer]: ...
     @property
     def base_mask_ptr(self) -> int: ...
     @property
-    def mask(self) -> Optional[DeviceBufferLike]: ...
+    def mask(self) -> Optional[Buffer]: ...
     @property
     def mask_ptr(self) -> int: ...
-    def set_base_mask(self, value: Optional[DeviceBufferLike]) -> None: ...
-    def set_mask(self: T, value: Optional[DeviceBufferLike]) -> T: ...
+    def set_base_mask(self, value: Optional[Buffer]) -> None: ...
+    def set_mask(self: T, value: Optional[Buffer]) -> T: ...
     @property
     def null_count(self) -> int: ...
     @property
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 1e7f0b175bc..918d786fb83 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -8,7 +8,7 @@ import rmm
 import cudf
 import cudf._lib as libcudf
 from cudf.api.types import is_categorical_dtype
-from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
+from cudf.core.buffer import Buffer, as_buffer
 
 from cpython.buffer cimport PyObject_CheckBuffer
 from libc.stdint cimport uintptr_t
@@ -39,9 +39,9 @@ cdef class Column:
     A Column stores columnar data in device memory.
     A Column may be composed of:
 
-    * A *data* DeviceBufferLike
+    * A *data* Buffer
     * One or more (optional) *children* Columns
-    * An (optional) *mask* DeviceBufferLike representing the nullmask
+    * An (optional) *mask* Buffer representing the nullmask
 
     The *dtype* indicates the Column's element type.
     """
@@ -106,9 +106,9 @@ cdef class Column:
             return self.data.ptr
 
     def set_base_data(self, value):
-        if value is not None and not isinstance(value, DeviceBufferLike):
+        if value is not None and not isinstance(value, Buffer):
             raise TypeError(
-                "Expected a DeviceBufferLike or None for data, "
+                "Expected a Buffer or None for data, "
                 f"got {type(value).__name__}"
             )
 
@@ -155,9 +155,9 @@ cdef class Column:
         modify size or offset in any way, so the passed mask is expected to be
         compatible with the current offset.
         """
-        if value is not None and not isinstance(value, DeviceBufferLike):
+        if value is not None and not isinstance(value, Buffer):
             raise TypeError(
-                "Expected a DeviceBufferLike or None for mask, "
+                "Expected a Buffer or None for mask, "
                 f"got {type(value).__name__}"
             )
 
@@ -165,7 +165,7 @@ cdef class Column:
             required_size = bitmask_allocation_size_bytes(self.base_size)
             if value.size < required_size:
                 error_msg = (
-                    "The DeviceBufferLike for mask is smaller than expected, "
+                    "The Buffer for mask is smaller than expected, "
                     f"got {value.size} bytes, expected {required_size} bytes."
                 )
                 if self.offset > 0 or self.size < self.base_size:
@@ -210,30 +210,30 @@ cdef class Column:
                 if isinstance(value, Column):
                     value = value.data_array_view
                 value = cp.asarray(value).view('|u1')
-            mask = as_device_buffer_like(value)
+            mask = as_buffer(value)
             if mask.size < required_num_bytes:
                 raise ValueError(error_msg.format(str(value.size)))
             if mask.size < mask_size:
                 dbuf = rmm.DeviceBuffer(size=mask_size)
                 dbuf.copy_from_device(value)
-                mask = as_device_buffer_like(dbuf)
+                mask = as_buffer(dbuf)
         elif hasattr(value, "__array_interface__"):
             value = np.asarray(value).view("u1")[:mask_size]
             if value.size < required_num_bytes:
                 raise ValueError(error_msg.format(str(value.size)))
             dbuf = rmm.DeviceBuffer(size=mask_size)
             dbuf.copy_from_host(value)
-            mask = as_device_buffer_like(dbuf)
+            mask = as_buffer(dbuf)
         elif PyObject_CheckBuffer(value):
             value = np.asarray(value).view("u1")[:mask_size]
             if value.size < required_num_bytes:
                 raise ValueError(error_msg.format(str(value.size)))
             dbuf = rmm.DeviceBuffer(size=mask_size)
             dbuf.copy_from_host(value)
-            mask = as_device_buffer_like(dbuf)
+            mask = as_buffer(dbuf)
         else:
             raise TypeError(
-                "Expected a DeviceBufferLike object or None for mask, "
+                "Expected a Buffer object or None for mask, "
                 f"got {type(value).__name__}"
             )
 
@@ -432,11 +432,11 @@ cdef class Column:
         cdef column_contents contents = move(c_col.get()[0].release())
 
         data = DeviceBuffer.c_from_unique_ptr(move(contents.data))
-        data = as_device_buffer_like(data)
+        data = as_buffer(data)
 
         if null_count > 0:
             mask = DeviceBuffer.c_from_unique_ptr(move(contents.null_mask))
-            mask = as_device_buffer_like(mask)
+            mask = as_buffer(mask)
         else:
             mask = None
 
@@ -461,8 +461,8 @@ cdef class Column:
         Given a ``cudf::column_view``, constructs a ``cudf.Column`` from it,
         along with referencing an ``owner`` Python object that owns the memory
         lifetime. If ``owner`` is a ``cudf.Column``, we reach inside of it and
-        make the owner of each newly created ``DeviceBufferLike`` the
-        respective ``DeviceBufferLike`` from the ``owner`` ``cudf.Column``.
+        make the owner of each newly created ``Buffer`` the respective
+        ``Buffer`` from the ``owner`` ``cudf.Column``.
         If ``owner`` is ``None``, we allocate new memory for the resulting
         ``cudf.Column``.
         """
@@ -487,18 +487,18 @@ cdef class Column:
 
         if data_ptr:
             if data_owner is None:
-                data = as_device_buffer_like(
+                data = as_buffer(
                     rmm.DeviceBuffer(ptr=data_ptr,
                                      size=(size+offset) * dtype.itemsize)
                 )
             else:
-                data = Buffer(
+                data = as_buffer(
                     data=data_ptr,
                     size=(base_size) * dtype.itemsize,
                     owner=data_owner
                 )
         else:
-            data = as_device_buffer_like(
+            data = as_buffer(
                 rmm.DeviceBuffer(ptr=data_ptr, size=0)
             )
 
@@ -528,14 +528,14 @@ cdef class Column:
                     # result:
                     mask = None
                 else:
-                    mask = as_device_buffer_like(
+                    mask = as_buffer(
                         rmm.DeviceBuffer(
                             ptr=mask_ptr,
                             size=bitmask_allocation_size_bytes(size+offset)
                         )
                     )
             else:
-                mask = Buffer(
+                mask = as_buffer(
                     data=mask_ptr,
                     size=bitmask_allocation_size_bytes(base_size),
                     owner=mask_owner
diff --git a/python/cudf/cudf/_lib/concat.pyx b/python/cudf/cudf/_lib/concat.pyx
index ed858034032..75e2d3bfbdc 100644
--- a/python/cudf/cudf/_lib/concat.pyx
+++ b/python/cudf/cudf/_lib/concat.pyx
@@ -19,7 +19,7 @@ from cudf._lib.utils cimport (
     table_view_from_table,
 )
 
-from cudf.core.buffer import as_device_buffer_like
+from cudf.core.buffer import as_buffer
 
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
@@ -31,7 +31,7 @@ cpdef concat_masks(object columns):
     with nogil:
         c_result = move(libcudf_concatenate_masks(c_views))
         c_unique_result = make_unique[device_buffer](move(c_result))
-    return as_device_buffer_like(
+    return as_buffer(
         DeviceBuffer.c_from_unique_ptr(move(c_unique_result))
     )
 
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 26ec2fbcdfc..d9a7a5b8754 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -12,7 +12,7 @@ from libcpp.vector cimport vector
 from rmm._lib.device_buffer cimport DeviceBuffer
 
 import cudf
-from cudf.core.buffer import Buffer
+from cudf.core.buffer import Buffer, as_buffer
 
 from cudf._lib.column cimport Column
 
@@ -721,7 +721,7 @@ cdef class _CPackedColumns:
         header = {}
         frames = []
 
-        gpu_data = Buffer(
+        gpu_data = as_buffer(
             data=self.gpu_data_ptr,
             size=self.gpu_data_size,
             owner=self
diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
index 976fe0e78fc..61988019c70 100644
--- a/python/cudf/cudf/_lib/null_mask.pyx
+++ b/python/cudf/cudf/_lib/null_mask.pyx
@@ -22,7 +22,7 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport mask_state, size_type
 from cudf._lib.utils cimport table_view_from_columns
 
-from cudf.core.buffer import as_device_buffer_like
+from cudf.core.buffer import as_buffer
 
 
 class MaskState(Enum):
@@ -52,7 +52,7 @@ def copy_bitmask(Column col):
         up_db = make_unique[device_buffer](move(db))
 
     rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_device_buffer_like(rmm_db)
+    buf = as_buffer(rmm_db)
     return buf
 
 
@@ -98,7 +98,7 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
         up_db = make_unique[device_buffer](move(db))
 
     rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_device_buffer_like(rmm_db)
+    buf = as_buffer(rmm_db)
     return buf
 
 
@@ -110,7 +110,7 @@ def bitmask_and(columns: list):
         c_result = move(cpp_bitmask_and(c_view))
         up_db = make_unique[device_buffer](move(c_result.first))
     dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_device_buffer_like(dbuf)
+    buf = as_buffer(dbuf)
     return buf, c_result.second
 
 
@@ -122,5 +122,5 @@ def bitmask_or(columns: list):
         c_result = move(cpp_bitmask_or(c_view))
         up_db = make_unique[device_buffer](move(c_result.first))
     dbuf = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_device_buffer_like(dbuf)
+    buf = as_buffer(dbuf)
     return buf, c_result.second
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index e1612855dae..b95bce0db58 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -5,7 +5,7 @@ from numba.np import numpy_support
 import cudf
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 from cudf.core._internals.expressions import parse_expression
-from cudf.core.buffer import as_device_buffer_like
+from cudf.core.buffer import as_buffer
 from cudf.utils import cudautils
 
 from cython.operator cimport dereference
@@ -37,7 +37,7 @@ from cudf._lib.utils cimport (
 def bools_to_mask(Column col):
     """
     Given an int8 (boolean) column, compress the data from booleans to bits and
-    return a DeviceBufferLike
+    return a Buffer
     """
     cdef column_view col_view = col.view()
     cdef pair[unique_ptr[device_buffer], size_type] cpp_out
@@ -48,7 +48,7 @@ def bools_to_mask(Column col):
         up_db = move(cpp_out.first)
 
     rmm_db = DeviceBuffer.c_from_unique_ptr(move(up_db))
-    buf = as_device_buffer_like(rmm_db)
+    buf = as_buffer(rmm_db)
     return buf
 
 
@@ -57,9 +57,9 @@ def mask_to_bools(object mask_buffer, size_type begin_bit, size_type end_bit):
     Given a mask buffer, returns a boolean column representng bit 0 -> False
     and 1 -> True within range of [begin_bit, end_bit),
     """
-    if not isinstance(mask_buffer, cudf.core.buffer.DeviceBufferLike):
+    if not isinstance(mask_buffer, cudf.core.buffer.Buffer):
         raise TypeError("mask_buffer is not an instance of "
-                        "cudf.core.buffer.DeviceBufferLike")
+                        "cudf.core.buffer.Buffer")
     cdef bitmask_type* bit_mask = <bitmask_type*><uintptr_t>(mask_buffer.ptr)
 
     cdef unique_ptr[column] result
@@ -84,7 +84,7 @@ def nans_to_nulls(Column input):
         return None
 
     buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
-    buffer = as_device_buffer_like(buffer)
+    buffer = as_buffer(buffer)
     return buffer
 
 
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 989d12caca0..5f4d3e17fbc 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -340,8 +340,8 @@ cdef data_from_table_view(
     along with referencing an ``owner`` Python object that owns the memory
     lifetime. If ``owner`` is a Frame we reach inside of it and
     reach inside of each ``cudf.Column`` to make the owner of each newly
-    created ``DeviceBufferLike`` underneath the ``cudf.Column`` objects of the
-    created Frame the respective ``DeviceBufferLike`` from the relevant
+    created ``Buffer`` underneath the ``cudf.Column`` objects of the
+    created Frame the respective ``Buffer`` from the relevant
     ``cudf.Column`` of the ``owner`` Frame
     """
     cdef size_type column_idx = 0
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index dcbf96313a7..1c8874a2abd 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -1,20 +1,10 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 """Common abstract base classes for cudf."""
 
-import sys
-
-import rmm
+import pickle
 
 import cudf
 
-if sys.version_info < (3, 8):
-    try:
-        import pickle5 as pickle
-    except ImportError:
-        import pickle  # type: ignore
-else:
-    import pickle  # type: ignore
-
 
 class Serializable:
     """A serializable object composed of device memory buffers.
@@ -90,14 +80,14 @@ def device_serialize(self):
         header : dict
             The metadata required to reconstruct the object.
         frames : list
-            The DeviceBufferLike or memoryview objects that the object
+            The Buffer or memoryview objects that the object
             should contain.
 
         :meta private:
         """
         header, frames = self.serialize()
         assert all(
-            isinstance(f, (cudf.core.buffer.DeviceBufferLike, memoryview))
+            isinstance(f, (cudf.core.buffer.Buffer, memoryview))
             for f in frames
         )
         header["type-serialized"] = pickle.dumps(type(self))
@@ -132,18 +122,10 @@ def device_deserialize(cls, header, frames):
         """
         typ = pickle.loads(header["type-serialized"])
         frames = [
-            cudf.core.buffer.as_device_buffer_like(f) if c else memoryview(f)
+            cudf.core.buffer.as_buffer(f) if c else memoryview(f)
             for c, f in zip(header["is-cuda"], frames)
         ]
-        assert all(
-            (type(f._owner) is rmm.DeviceBuffer)
-            if c
-            else (type(f) is memoryview)
-            for c, f in zip(header["is-cuda"], frames)
-        )
-        obj = typ.deserialize(header, frames)
-
-        return obj
+        return typ.deserialize(header, frames)
 
     def host_serialize(self):
         """Serialize data and metadata associated with host memory.
@@ -186,7 +168,7 @@ def host_deserialize(cls, header, frames):
         :meta private:
         """
         frames = [
-            rmm.DeviceBuffer.to_device(f) if c else f
+            cudf.core.buffer.as_buffer(f) if c else f
             for c, f in zip(header["is-cuda"], map(memoryview, frames))
         ]
         obj = cls.device_deserialize(header, frames)
diff --git a/python/cudf/cudf/core/buffer.py b/python/cudf/cudf/core/buffer.py
deleted file mode 100644
index 647e747e127..00000000000
--- a/python/cudf/cudf/core/buffer.py
+++ /dev/null
@@ -1,325 +0,0 @@
-# Copyright (c) 2020-2022, NVIDIA CORPORATION.
-
-from __future__ import annotations
-
-import math
-import pickle
-from typing import (
-    Any,
-    Dict,
-    List,
-    Mapping,
-    Protocol,
-    Sequence,
-    Tuple,
-    Union,
-    runtime_checkable,
-)
-
-import numpy as np
-
-import rmm
-
-import cudf
-from cudf.core.abc import Serializable
-from cudf.utils.string import format_bytes
-
-# Frame type for serialization and deserialization of `DeviceBufferLike`
-Frame = Union[memoryview, "DeviceBufferLike"]
-
-
-@runtime_checkable
-class DeviceBufferLike(Protocol):
-    def __getitem__(self, key: slice) -> DeviceBufferLike:
-        """Create a new view of the buffer."""
-
-    @property
-    def size(self) -> int:
-        """Size of the buffer in bytes."""
-
-    @property
-    def nbytes(self) -> int:
-        """Size of the buffer in bytes."""
-
-    @property
-    def ptr(self) -> int:
-        """Device pointer to the start of the buffer."""
-
-    @property
-    def owner(self) -> Any:
-        """Object owning the memory of the buffer."""
-
-    @property
-    def __cuda_array_interface__(self) -> Mapping:
-        """Implementation of the CUDA Array Interface."""
-
-    def memoryview(self) -> memoryview:
-        """Read-only access to the buffer through host memory."""
-
-    def serialize(self) -> Tuple[dict, List[Frame]]:
-        """Serialize the buffer into header and frames.
-
-        The frames can be a mixture of memoryview and device-buffer-like
-        objects.
-
-        Returns
-        -------
-        Tuple[Dict, List]
-            The first element of the returned tuple is a dict containing any
-            serializable metadata required to reconstruct the object. The
-            second element is a list containing the device buffers and
-            memoryviews of the object.
-        """
-
-    @classmethod
-    def deserialize(
-        cls, header: dict, frames: List[Frame]
-    ) -> DeviceBufferLike:
-        """Generate an buffer from a serialized representation.
-
-        Parameters
-        ----------
-        header : dict
-            The metadata required to reconstruct the object.
-        frames : list
-            The device-buffer-like and memoryview buffers that the object
-            should contain.
-
-        Returns
-        -------
-        DeviceBufferLike
-            A new object that implements DeviceBufferLike.
-        """
-
-
-def as_device_buffer_like(obj: Any) -> DeviceBufferLike:
-    """
-    Factory function to wrap `obj` in a DeviceBufferLike object.
-
-    If `obj` isn't device-buffer-like already, a new buffer that implements
-    DeviceBufferLike and points to the memory of `obj` is created. If `obj`
-    represents host memory, it is copied to a new `rmm.DeviceBuffer` device
-    allocation. Otherwise, the data of `obj` is **not** copied, instead the
-    new buffer keeps a reference to `obj` in order to retain the lifetime
-    of `obj`.
-
-    Raises ValueError if the data of `obj` isn't C-contiguous.
-
-    Parameters
-    ----------
-    obj : buffer-like or array-like
-        An object that exposes either device or host memory through
-        `__array_interface__`, `__cuda_array_interface__`, or the
-        buffer protocol. If `obj` represents host memory, data will
-        be copied.
-
-    Return
-    ------
-    DeviceBufferLike
-        A device-buffer-like instance that represents the device memory
-        of `obj`.
-    """
-
-    if isinstance(obj, DeviceBufferLike):
-        return obj
-    return Buffer(obj)
-
-
-class Buffer(Serializable):
-    """
-    A Buffer represents device memory.
-
-    Usually Buffers will be created using `as_device_buffer_like(obj)`,
-    which will make sure that `obj` is device-buffer-like and not a `Buffer`
-    necessarily.
-
-    Parameters
-    ----------
-    data : int or buffer-like or array-like
-        An integer representing a pointer to device memory or a buffer-like
-        or array-like object. When not an integer, `size` and `owner` must
-        be None.
-    size : int, optional
-        Size of device memory in bytes. Must be specified if `data` is an
-        integer.
-    owner : object, optional
-        Python object to which the lifetime of the memory allocation is tied.
-        A reference to this object is kept in the returned Buffer.
-    """
-
-    _ptr: int
-    _size: int
-    _owner: object
-
-    def __init__(
-        self, data: Union[int, Any], *, size: int = None, owner: object = None
-    ):
-        if isinstance(data, int):
-            if size is None:
-                raise ValueError(
-                    "size must be specified when `data` is an integer"
-                )
-            if size < 0:
-                raise ValueError("size cannot be negative")
-            self._ptr = data
-            self._size = size
-            self._owner = owner
-        else:
-            if size is not None or owner is not None:
-                raise ValueError(
-                    "`size` and `owner` must be None when "
-                    "`data` is a buffer-like object"
-                )
-
-            # `data` is a buffer-like object
-            buf: Any = data
-            if isinstance(buf, rmm.DeviceBuffer):
-                self._ptr = buf.ptr
-                self._size = buf.size
-                self._owner = buf
-                return
-            iface = getattr(buf, "__cuda_array_interface__", None)
-            if iface:
-                ptr, size = get_ptr_and_size(iface)
-                self._ptr = ptr
-                self._size = size
-                self._owner = buf
-                return
-            ptr, size = get_ptr_and_size(np.asarray(buf).__array_interface__)
-            buf = rmm.DeviceBuffer(ptr=ptr, size=size)
-            self._ptr = buf.ptr
-            self._size = buf.size
-            self._owner = buf
-
-    def __getitem__(self, key: slice) -> Buffer:
-        if not isinstance(key, slice):
-            raise ValueError("index must be an slice")
-        start, stop, step = key.indices(self.size)
-        if step != 1:
-            raise ValueError("slice must be contiguous")
-        return self.__class__(
-            data=self.ptr + start, size=stop - start, owner=self.owner
-        )
-
-    @property
-    def size(self) -> int:
-        return self._size
-
-    @property
-    def nbytes(self) -> int:
-        return self._size
-
-    @property
-    def ptr(self) -> int:
-        return self._ptr
-
-    @property
-    def owner(self) -> Any:
-        return self._owner
-
-    @property
-    def __cuda_array_interface__(self) -> dict:
-        return {
-            "data": (self.ptr, False),
-            "shape": (self.size,),
-            "strides": None,
-            "typestr": "|u1",
-            "version": 0,
-        }
-
-    def memoryview(self) -> memoryview:
-        host_buf = bytearray(self.size)
-        rmm._lib.device_buffer.copy_ptr_to_host(self.ptr, host_buf)
-        return memoryview(host_buf).toreadonly()
-
-    def serialize(self) -> Tuple[dict, list]:
-        header = {}  # type: Dict[Any, Any]
-        header["type-serialized"] = pickle.dumps(type(self))
-        header["constructor-kwargs"] = {}
-        header["desc"] = self.__cuda_array_interface__.copy()
-        header["desc"]["strides"] = (1,)
-        header["frame_count"] = 1
-        frames = [self]
-        return header, frames
-
-    @classmethod
-    def deserialize(cls, header: dict, frames: list) -> Buffer:
-        assert (
-            header["frame_count"] == 1
-        ), "Only expecting to deserialize Buffer with a single frame."
-        buf = cls(frames[0], **header["constructor-kwargs"])
-
-        if header["desc"]["shape"] != buf.__cuda_array_interface__["shape"]:
-            raise ValueError(
-                f"Received a `Buffer` with the wrong size."
-                f" Expected {header['desc']['shape']}, "
-                f"but got {buf.__cuda_array_interface__['shape']}"
-            )
-
-        return buf
-
-    def __repr__(self) -> str:
-        return (
-            f"<cudf.core.buffer.Buffer size={format_bytes(self._size)} "
-            f"ptr={hex(self._ptr)} owner={repr(self._owner)} "
-        )
-
-
-def is_c_contiguous(
-    shape: Sequence[int], strides: Sequence[int], itemsize: int
-) -> bool:
-    """
-    Determine if shape and strides are C-contiguous
-
-    Parameters
-    ----------
-    shape : Sequence[int]
-        Number of elements in each dimension.
-    strides : Sequence[int]
-        The stride of each dimension in bytes.
-    itemsize : int
-        Size of an element in bytes.
-
-    Return
-    ------
-    bool
-        The boolean answer.
-    """
-
-    if any(dim == 0 for dim in shape):
-        return True
-    cumulative_stride = itemsize
-    for dim, stride in zip(reversed(shape), reversed(strides)):
-        if dim > 1 and stride != cumulative_stride:
-            return False
-        cumulative_stride *= dim
-    return True
-
-
-def get_ptr_and_size(array_interface: Mapping) -> Tuple[int, int]:
-    """
-    Retrieve the pointer and size from an array interface.
-
-    Raises ValueError if array isn't C-contiguous.
-
-    Parameters
-    ----------
-    array_interface : Mapping
-        The array interface metadata.
-
-    Return
-    ------
-    pointer : int
-        The pointer to device or host memory
-    size : int
-        The size in bytes
-    """
-
-    shape = array_interface["shape"] or (1,)
-    strides = array_interface["strides"]
-    itemsize = cudf.dtype(array_interface["typestr"]).itemsize
-    if strides is None or is_c_contiguous(shape, strides, itemsize):
-        nelem = math.prod(shape)
-        ptr = array_interface["data"][0] or 0
-        return ptr, nelem * itemsize
-    raise ValueError("Buffer data must be C-contiguous")
diff --git a/python/cudf/cudf/core/buffer/__init__.py b/python/cudf/cudf/core/buffer/__init__.py
new file mode 100644
index 00000000000..a73bc69ffb5
--- /dev/null
+++ b/python/cudf/cudf/core/buffer/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
+from cudf.core.buffer.utils import as_buffer
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
new file mode 100644
index 00000000000..73e589ebb8e
--- /dev/null
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -0,0 +1,319 @@
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import math
+import pickle
+from types import SimpleNamespace
+from typing import Any, Dict, Mapping, Sequence, Tuple, Type, TypeVar
+
+import numpy
+
+import rmm
+
+import cudf
+from cudf.core.abc import Serializable
+from cudf.utils.string import format_bytes
+
+T = TypeVar("T", bound="Buffer")
+
+
+def cuda_array_interface_wrapper(
+    ptr: int,
+    size: int,
+    owner: object = None,
+    readonly=False,
+    typestr="|u1",
+    version=0,
+):
+    """Wrap device pointer in an object that exposes `__cuda_array_interface__`
+
+    See <https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html>
+
+    Parameters
+    ----------
+    ptr : int
+        An integer representing a pointer to device memory.
+    size : int, optional
+        Size of device memory in bytes.
+    owner : object, optional
+        Python object to which the lifetime of the memory allocation is tied.
+        A reference to this object is kept in the returned wrapper object.
+    readonly: bool, optional
+        Mark the interface read-only.
+    typestr: str, optional
+        The type string of the interface. By default this is "|u1", which
+        means "an unsigned integer with a not relevant byteorder". See:
+        <https://numpy.org/doc/stable/reference/arrays.interface.html>
+    version : bool, optional
+        The version of the interface.
+
+    Return
+    ------
+    SimpleNamespace
+        An object that exposes `__cuda_array_interface__` and keeps a reference
+        to `owner`.
+    """
+
+    if size < 0:
+        raise ValueError("size cannot be negative")
+
+    return SimpleNamespace(
+        __cuda_array_interface__={
+            "data": (ptr, readonly),
+            "shape": (size,),
+            "strides": None,
+            "typestr": typestr,
+            "version": version,
+        },
+        owner=owner,
+    )
+
+
+class Buffer(Serializable):
+    """A Buffer represents device memory.
+
+    Use the factory function `as_buffer` to create a Buffer instance.
+    """
+
+    _ptr: int
+    _size: int
+    _owner: object
+
+    def __init__(self):
+        raise ValueError(
+            f"do not create a {self.__class__} directly, please "
+            "use the factory function `cudf.core.buffer.as_buffer`"
+        )
+
+    @classmethod
+    def _from_device_memory(cls: Type[T], data: Any) -> T:
+        """Create a Buffer from an object exposing `__cuda_array_interface__`.
+
+        No data is being copied.
+
+        Parameters
+        ----------
+        data : device-buffer-like
+            An object implementing the CUDA Array Interface.
+
+        Returns
+        -------
+        Buffer
+            Buffer representing the same device memory as `data`
+        """
+
+        # Bypass `__init__` and initialize attributes manually
+        ret = cls.__new__(cls)
+        ret._owner = data
+        if isinstance(data, rmm.DeviceBuffer):  # Common case shortcut
+            ret._ptr = data.ptr
+            ret._size = data.size
+        else:
+            ret._ptr, ret._size = get_ptr_and_size(
+                data.__cuda_array_interface__
+            )
+        if ret.size < 0:
+            raise ValueError("size cannot be negative")
+        return ret
+
+    @classmethod
+    def _from_host_memory(cls: Type[T], data: Any) -> T:
+        """Create a Buffer from a buffer or array like object
+
+        Data must implement `__array_interface__`, the buffer protocol, and/or
+        be convertible to a buffer object using `numpy.array()`
+
+        The host memory is copied to a new device allocation.
+
+        Raises ValueError if array isn't C-contiguous.
+
+        Parameters
+        ----------
+        data : Any
+            An object that represens host memory.
+
+        Returns
+        -------
+        Buffer
+            Buffer representing a copy of `data`.
+        """
+
+        # Convert to numpy array, this will not copy data in most cases.
+        ary = numpy.array(data, copy=False, subok=True)
+        # Extract pointer and size
+        ptr, size = get_ptr_and_size(ary.__array_interface__)
+        # Copy to device memory
+        buf = rmm.DeviceBuffer(ptr=ptr, size=size)
+        # Create from device memory
+        return cls._from_device_memory(buf)
+
+    def _getitem(self, offset: int, size: int) -> Buffer:
+        """
+        Sub-classes can overwrite this to implement __getitem__
+        without having to handle non-slice inputs.
+        """
+        return self._from_device_memory(
+            cuda_array_interface_wrapper(
+                ptr=self.ptr + offset, size=size, owner=self.owner
+            )
+        )
+
+    def __getitem__(self, key: slice) -> Buffer:
+        """Create a new slice of the buffer."""
+        if not isinstance(key, slice):
+            raise TypeError(
+                "Argument 'key' has incorrect type "
+                f"(expected slice, got {key.__class__.__name__})"
+            )
+        start, stop, step = key.indices(self.size)
+        if step != 1:
+            raise ValueError("slice must be C-contiguous")
+        return self._getitem(offset=start, size=stop - start)
+
+    @property
+    def size(self) -> int:
+        """Size of the buffer in bytes."""
+        return self._size
+
+    @property
+    def nbytes(self) -> int:
+        """Size of the buffer in bytes."""
+        return self._size
+
+    @property
+    def ptr(self) -> int:
+        """Device pointer to the start of the buffer."""
+        return self._ptr
+
+    @property
+    def owner(self) -> Any:
+        """Object owning the memory of the buffer."""
+        return self._owner
+
+    @property
+    def __cuda_array_interface__(self) -> Mapping:
+        """Implementation of the CUDA Array Interface."""
+        return {
+            "data": (self.ptr, False),
+            "shape": (self.size,),
+            "strides": None,
+            "typestr": "|u1",
+            "version": 0,
+        }
+
+    def memoryview(self) -> memoryview:
+        """Read-only access to the buffer through host memory."""
+        host_buf = bytearray(self.size)
+        rmm._lib.device_buffer.copy_ptr_to_host(self.ptr, host_buf)
+        return memoryview(host_buf).toreadonly()
+
+    def serialize(self) -> Tuple[dict, list]:
+        """Serialize the buffer into header and frames.
+
+        The frames can be a mixture of memoryview and Buffer objects.
+
+        Returns
+        -------
+        Tuple[dict, List]
+            The first element of the returned tuple is a dict containing any
+            serializable metadata required to reconstruct the object. The
+            second element is a list containing Buffers and memoryviews.
+        """
+        header: Dict[str, Any] = {}
+        header["type-serialized"] = pickle.dumps(type(self))
+        header["frame_count"] = 1
+        frames = [self]
+        return header, frames
+
+    @classmethod
+    def deserialize(cls: Type[T], header: dict, frames: list) -> T:
+        """Create an Buffer from a serialized representation.
+
+        Parameters
+        ----------
+        header : dict
+            The metadata required to reconstruct the object.
+        frames : list
+            The Buffer and memoryview that makes up the Buffer.
+
+        Returns
+        -------
+        Buffer
+            The deserialized Buffer.
+        """
+        if header["frame_count"] != 1:
+            raise ValueError("Deserializing a Buffer expect a single frame")
+        frame = frames[0]
+        if isinstance(frame, cls):
+            return frame  # The frame is already deserialized
+
+        if hasattr(frame, "__cuda_array_interface__"):
+            return cls._from_device_memory(frame)
+        return cls._from_host_memory(frame)
+
+    def __repr__(self) -> str:
+        klass = self.__class__
+        name = f"{klass.__module__}.{klass.__qualname__}"
+        return (
+            f"<{name} size={format_bytes(self._size)} "
+            f"ptr={hex(self._ptr)} owner={repr(self._owner)}>"
+        )
+
+
+def is_c_contiguous(
+    shape: Sequence[int], strides: Sequence[int], itemsize: int
+) -> bool:
+    """Determine if shape and strides are C-contiguous
+
+    Parameters
+    ----------
+    shape : Sequence[int]
+        Number of elements in each dimension.
+    strides : Sequence[int]
+        The stride of each dimension in bytes.
+    itemsize : int
+        Size of an element in bytes.
+
+    Return
+    ------
+    bool
+        The boolean answer.
+    """
+
+    if any(dim == 0 for dim in shape):
+        return True
+    cumulative_stride = itemsize
+    for dim, stride in zip(reversed(shape), reversed(strides)):
+        if dim > 1 and stride != cumulative_stride:
+            return False
+        cumulative_stride *= dim
+    return True
+
+
+def get_ptr_and_size(array_interface: Mapping) -> Tuple[int, int]:
+    """Retrieve the pointer and size from an array interface.
+
+    Raises ValueError if array isn't C-contiguous.
+
+    Parameters
+    ----------
+    array_interface : Mapping
+        The array interface metadata.
+
+    Return
+    ------
+    pointer : int
+        The pointer to device or host memory
+    size : int
+        The size in bytes
+    """
+
+    shape = array_interface["shape"] or (1,)
+    strides = array_interface["strides"]
+    itemsize = cudf.dtype(array_interface["typestr"]).itemsize
+    if strides is None or is_c_contiguous(shape, strides, itemsize):
+        nelem = math.prod(shape)
+        ptr = array_interface["data"][0] or 0
+        return ptr, nelem * itemsize
+    raise ValueError("Buffer data must be C-contiguous")
diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py
new file mode 100644
index 00000000000..5e017c4bc92
--- /dev/null
+++ b/python/cudf/cudf/core/buffer/utils.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+from typing import Any, Union
+
+from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
+
+
+def as_buffer(
+    data: Union[int, Any],
+    *,
+    size: int = None,
+    owner: object = None,
+) -> Buffer:
+    """Factory function to wrap `data` in a Buffer object.
+
+    If `data` isn't a buffer already, a new buffer that points to the memory of
+    `data` is created. If `data` represents host memory, it is copied to a new
+    `rmm.DeviceBuffer` device allocation. Otherwise, the memory of `data` is
+    **not** copied, instead the new buffer keeps a reference to `data` in order
+    to retain its lifetime.
+
+    If `data` is an integer, it is assumed to point to device memory.
+
+    Raises ValueError if data isn't C-contiguous.
+
+    Parameters
+    ----------
+    data : int or buffer-like or array-like
+        An integer representing a pointer to device memory or a buffer-like
+        or array-like object. When not an integer, `size` and `owner` must
+        be None.
+    size : int, optional
+        Size of device memory in bytes. Must be specified if `data` is an
+        integer.
+    owner : object, optional
+        Python object to which the lifetime of the memory allocation is tied.
+        A reference to this object is kept in the returned Buffer.
+
+    Return
+    ------
+    Buffer
+        A buffer instance that represents the device memory of `data`.
+    """
+
+    if isinstance(data, Buffer):
+        return data
+
+    # We handle the integer argument in the factory function by wrapping
+    # the pointer in a `__cuda_array_interface__` exposing object so that
+    # the Buffer (and its sub-classes) do not have to.
+    if isinstance(data, int):
+        if size is None:
+            raise ValueError(
+                "size must be specified when `data` is an integer"
+            )
+        data = cuda_array_interface_wrapper(ptr=data, size=size, owner=owner)
+    elif size is not None or owner is not None:
+        raise ValueError(
+            "`size` and `owner` must be None when "
+            "`data` is a buffer-like or array-like object"
+        )
+
+    if hasattr(data, "__cuda_array_interface__"):
+        return Buffer._from_device_memory(data)
+    return Buffer._from_host_memory(data)
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index af5d140a20a..322092a149c 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -16,7 +16,7 @@
 from cudf._lib.transform import bools_to_mask
 from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
 from cudf.api.types import is_categorical_dtype, is_interval_dtype
-from cudf.core.buffer import DeviceBufferLike
+from cudf.core.buffer import Buffer
 from cudf.core.column import column
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import CategoricalDtype
@@ -595,7 +595,7 @@ class CategoricalColumn(column.ColumnBase):
     Parameters
     ----------
     dtype : CategoricalDtype
-    mask : DeviceBufferLike
+    mask : Buffer
         The validity mask
     offset : int
         Data offset
@@ -619,7 +619,7 @@ class CategoricalColumn(column.ColumnBase):
     def __init__(
         self,
         dtype: CategoricalDtype,
-        mask: DeviceBufferLike = None,
+        mask: Buffer = None,
         size: int = None,
         offset: int = 0,
         null_count: int = None,
@@ -678,7 +678,7 @@ def _process_values_for_isin(
         rhs = cudf.core.column.as_column(values, dtype=self.dtype)
         return lhs, rhs
 
-    def set_base_mask(self, value: Optional[DeviceBufferLike]):
+    def set_base_mask(self, value: Optional[Buffer]):
         super().set_base_mask(value)
         self._codes = None
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 7291b695312..22f8d27f9e8 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -64,7 +64,7 @@
 )
 from cudf.core._compat import PANDAS_GE_150
 from cudf.core.abc import Serializable
-from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
+from cudf.core.buffer import Buffer, as_buffer
 from cudf.core.dtypes import (
     CategoricalDtype,
     IntervalDtype,
@@ -357,7 +357,7 @@ def valid_count(self) -> int:
         return len(self) - self.null_count
 
     @property
-    def nullmask(self) -> DeviceBufferLike:
+    def nullmask(self) -> Buffer:
         """The gpu buffer for the null-mask"""
         if not self.nullable:
             raise ValueError("Column has no null mask")
@@ -761,12 +761,12 @@ def _obtain_isin_result(self, rhs: ColumnBase) -> ColumnBase:
         res = res.drop_duplicates(subset="orig_order", ignore_index=True)
         return res._data["bool"].fillna(False)
 
-    def as_mask(self) -> DeviceBufferLike:
+    def as_mask(self) -> Buffer:
         """Convert booleans to bitmask
 
         Returns
         -------
-        DeviceBufferLike
+        Buffer
         """
 
         if self.has_nulls():
@@ -1281,7 +1281,7 @@ def column_empty(
         data = None
         children = (
             build_column(
-                data=as_device_buffer_like(
+                data=as_buffer(
                     rmm.DeviceBuffer(
                         size=row_count * cudf.dtype("int32").itemsize
                     )
@@ -1294,7 +1294,7 @@ def column_empty(
         children = (
             full(row_count + 1, 0, dtype="int32"),
             build_column(
-                data=as_device_buffer_like(
+                data=as_buffer(
                     rmm.DeviceBuffer(
                         size=row_count * cudf.dtype("int8").itemsize
                     )
@@ -1303,9 +1303,7 @@ def column_empty(
             ),
         )
     else:
-        data = as_device_buffer_like(
-            rmm.DeviceBuffer(size=row_count * dtype.itemsize)
-        )
+        data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize))
 
     if masked:
         mask = create_null_mask(row_count, state=MaskState.ALL_NULL)
@@ -1318,11 +1316,11 @@ def column_empty(
 
 
 def build_column(
-    data: Union[DeviceBufferLike, None],
+    data: Union[Buffer, None],
     dtype: Dtype,
     *,
     size: int = None,
-    mask: DeviceBufferLike = None,
+    mask: Buffer = None,
     offset: int = 0,
     null_count: int = None,
     children: Tuple[ColumnBase, ...] = (),
@@ -1332,12 +1330,12 @@ def build_column(
 
     Parameters
     ----------
-    data : DeviceBufferLike
+    data : Buffer
         The data buffer (can be None if constructing certain Column
         types like StringColumn, ListColumn, or CategoricalColumn)
     dtype
         The dtype associated with the Column to construct
-    mask : DeviceBufferLike, optional
+    mask : Buffer, optional
         The mask buffer
     size : int, optional
     offset : int, optional
@@ -1482,7 +1480,7 @@ def build_column(
 def build_categorical_column(
     categories: ColumnBase,
     codes: ColumnBase,
-    mask: DeviceBufferLike = None,
+    mask: Buffer = None,
     size: int = None,
     offset: int = 0,
     null_count: int = None,
@@ -1498,7 +1496,7 @@ def build_categorical_column(
     codes : Column
         Column of codes, the size of the resulting Column will be
         the size of `codes`
-    mask : DeviceBufferLike
+    mask : Buffer
         Null mask
     size : int, optional
     offset : int, optional
@@ -1542,7 +1540,7 @@ def build_interval_column(
         Column of values representing the left of the interval
     right_col : Column
         Column of representing the right of the interval
-    mask : DeviceBufferLike
+    mask : Buffer
         Null mask
     size : int, optional
     offset : int, optional
@@ -1573,7 +1571,7 @@ def build_interval_column(
 def build_list_column(
     indices: ColumnBase,
     elements: ColumnBase,
-    mask: DeviceBufferLike = None,
+    mask: Buffer = None,
     size: int = None,
     offset: int = 0,
     null_count: int = None,
@@ -1587,7 +1585,7 @@ def build_list_column(
         Column of list indices
     elements : ColumnBase
         Column of list elements
-    mask: DeviceBufferLike
+    mask: Buffer
         Null mask
     size: int, optional
     offset: int, optional
@@ -1619,7 +1617,7 @@ def build_struct_column(
     names: Sequence[str],
     children: Tuple[ColumnBase, ...],
     dtype: Optional[Dtype] = None,
-    mask: DeviceBufferLike = None,
+    mask: Buffer = None,
     size: int = None,
     offset: int = 0,
     null_count: int = None,
@@ -1633,7 +1631,7 @@ def build_struct_column(
         Field names to map to children dtypes, must be strings.
     children : tuple
 
-    mask: DeviceBufferLike
+    mask: Buffer
         Null mask
     size: int, optional
     offset: int, optional
@@ -1669,9 +1667,7 @@ def _make_copy_replacing_NaT_with_null(column):
     out_col = cudf._lib.replace.replace(
         column,
         build_column(
-            as_device_buffer_like(
-                np.array([na_value], dtype=column.dtype).view("|u1")
-            ),
+            as_buffer(np.array([na_value], dtype=column.dtype).view("|u1")),
             dtype=column.dtype,
         ),
         null,
@@ -1766,7 +1762,7 @@ def as_column(
         ):
             arbitrary = cupy.ascontiguousarray(arbitrary)
 
-        data = as_device_buffer_like(arbitrary)
+        data = as_buffer(arbitrary)
         col = build_column(data, dtype=current_dtype, mask=mask)
 
         if dtype is not None:
@@ -1914,7 +1910,7 @@ def as_column(
             if cast_dtype:
                 arbitrary = arbitrary.astype(cudf.dtype("datetime64[s]"))
 
-            buffer = as_device_buffer_like(arbitrary.view("|u1"))
+            buffer = as_buffer(arbitrary.view("|u1"))
             mask = None
             if nan_as_null is None or nan_as_null is True:
                 data = build_column(buffer, dtype=arbitrary.dtype)
@@ -1932,7 +1928,7 @@ def as_column(
             if cast_dtype:
                 arbitrary = arbitrary.astype(cudf.dtype("timedelta64[s]"))
 
-            buffer = as_device_buffer_like(arbitrary.view("|u1"))
+            buffer = as_buffer(arbitrary.view("|u1"))
             mask = None
             if nan_as_null is None or nan_as_null is True:
                 data = build_column(buffer, dtype=arbitrary.dtype)
@@ -2211,7 +2207,7 @@ def _construct_array(
     return arbitrary
 
 
-def _mask_from_cuda_array_interface_desc(obj) -> Union[DeviceBufferLike, None]:
+def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]:
     desc = obj.__cuda_array_interface__
     mask = desc.get("mask", None)
 
@@ -2223,7 +2219,7 @@ def _mask_from_cuda_array_interface_desc(obj) -> Union[DeviceBufferLike, None]:
         typecode = typestr[1]
         if typecode == "t":
             mask_size = bitmask_allocation_size_bytes(nelem)
-            mask = Buffer(data=ptr, size=mask_size, owner=obj)
+            mask = as_buffer(data=ptr, size=mask_size, owner=obj)
         elif typecode == "b":
             col = as_column(mask)
             mask = bools_to_mask(col)
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 1419b14e8c6..56436ac141d 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -6,7 +6,6 @@
 import locale
 import re
 from locale import nl_langinfo
-from types import SimpleNamespace
 from typing import Any, Mapping, Sequence, cast
 
 import numpy as np
@@ -23,7 +22,7 @@
 )
 from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
 from cudf.core._compat import PANDAS_GE_120
-from cudf.core.buffer import DeviceBufferLike
+from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
 from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
 from cudf.utils.utils import _fillna_natwise
@@ -98,11 +97,11 @@ class DatetimeColumn(column.ColumnBase):
 
     Parameters
     ----------
-    data : DeviceBufferLike
+    data : Buffer
         The datetime values
     dtype : np.dtype
         The data type
-    mask : DeviceBufferLike; optional
+    mask : Buffer; optional
         The validity mask
     """
 
@@ -121,9 +120,9 @@ class DatetimeColumn(column.ColumnBase):
 
     def __init__(
         self,
-        data: DeviceBufferLike,
+        data: Buffer,
         dtype: DtypeObj,
-        mask: DeviceBufferLike = None,
+        mask: Buffer = None,
         size: int = None,  # TODO: make non-optional
         offset: int = 0,
         null_count: int = None,
@@ -131,9 +130,7 @@ def __init__(
         dtype = cudf.dtype(dtype)
 
         if data.size % dtype.itemsize:
-            raise ValueError(
-                "DeviceBufferLike size must be divisible by element size"
-            )
+            raise ValueError("Buffer size must be divisible by element size")
         if size is None:
             size = data.size // dtype.itemsize
             size = size - offset
@@ -291,20 +288,16 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
         }
 
         if self.nullable and self.has_nulls():
-
             # Create a simple Python object that exposes the
             # `__cuda_array_interface__` attribute here since we need to modify
             # some of the attributes from the numba device array
-            mask = SimpleNamespace(
-                __cuda_array_interface__={
-                    "shape": (len(self),),
-                    "typestr": "<t1",
-                    "data": (self.mask_ptr, True),
-                    "version": 1,
-                }
+            output["mask"] = cuda_array_interface_wrapper(
+                ptr=self.mask_ptr,
+                size=len(self),
+                owner=self.mask,
+                readonly=True,
+                typestr="<t1",
             )
-            output["mask"] = mask
-
         return output
 
     def as_datetime_column(self, dtype: Dtype, **kwargs) -> DatetimeColumn:
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index e03802e6d8c..0beb07bb591 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -16,7 +16,7 @@
 )
 from cudf._typing import ColumnBinaryOperand, Dtype
 from cudf.api.types import is_integer_dtype, is_scalar
-from cudf.core.buffer import as_device_buffer_like
+from cudf.core.buffer import as_buffer
 from cudf.core.column import ColumnBase, as_column
 from cudf.core.dtypes import (
     Decimal32Dtype,
@@ -203,7 +203,7 @@ def from_arrow(cls, data: pa.Array):
         data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int32"))
         data_32 = data_128[::4].copy()
         return cls(
-            data=as_device_buffer_like(data_32.view("uint8")),
+            data=as_buffer(data_32.view("uint8")),
             size=len(data),
             dtype=dtype,
             offset=data.offset,
@@ -290,7 +290,7 @@ def from_arrow(cls, data: pa.Array):
         data_128 = cp.array(np.frombuffer(data.buffers()[1]).view("int64"))
         data_64 = data_128[::2].copy()
         return cls(
-            data=as_device_buffer_like(data_64.view("uint8")),
+            data=as_buffer(data_64.view("uint8")),
             size=len(data),
             dtype=dtype,
             offset=data.offset,
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index a66c11c8bdc..f126f47c3c2 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-from types import SimpleNamespace
 from typing import (
     Any,
     Callable,
@@ -36,7 +35,7 @@
     is_number,
     is_scalar,
 )
-from cudf.core.buffer import DeviceBufferLike, as_device_buffer_like
+from cudf.core.buffer import Buffer, as_buffer, cuda_array_interface_wrapper
 from cudf.core.column import (
     ColumnBase,
     as_column,
@@ -66,10 +65,10 @@ class NumericalColumn(NumericalBaseColumn):
 
     Parameters
     ----------
-    data : DeviceBufferLike
+    data : Buffer
     dtype : np.dtype
-        The dtype associated with the data DeviceBufferLike
-    mask : DeviceBufferLike, optional
+        The dtype associated with the data Buffer
+    mask : Buffer, optional
     """
 
     _nan_count: Optional[int]
@@ -77,9 +76,9 @@ class NumericalColumn(NumericalBaseColumn):
 
     def __init__(
         self,
-        data: DeviceBufferLike,
+        data: Buffer,
         dtype: DtypeObj,
-        mask: DeviceBufferLike = None,
+        mask: Buffer = None,
         size: int = None,  # TODO: make this non-optional
         offset: int = 0,
         null_count: int = None,
@@ -87,9 +86,7 @@ def __init__(
         dtype = cudf.dtype(dtype)
 
         if data.size % dtype.itemsize:
-            raise ValueError(
-                "DeviceBufferLike size must be divisible by element size"
-            )
+            raise ValueError("Buffer size must be divisible by element size")
         if size is None:
             size = (data.size // dtype.itemsize) - offset
         self._nan_count = None
@@ -177,19 +174,16 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]:
         }
 
         if self.nullable and self.has_nulls():
-
             # Create a simple Python object that exposes the
             # `__cuda_array_interface__` attribute here since we need to modify
             # some of the attributes from the numba device array
-            mask = SimpleNamespace(
-                __cuda_array_interface__={
-                    "shape": (len(self),),
-                    "typestr": "<t1",
-                    "data": (self.mask_ptr, True),
-                    "version": 1,
-                }
+            output["mask"] = cuda_array_interface_wrapper(
+                ptr=self.mask_ptr,
+                size=len(self),
+                owner=self.mask,
+                readonly=True,
+                typestr="<t1",
             )
-            output["mask"] = mask
 
         return output
 
@@ -306,7 +300,7 @@ def normalize_binop_value(
             else:
                 ary = full(len(self), other, dtype=other_dtype)
                 return column.build_column(
-                    data=as_device_buffer_like(ary),
+                    data=as_buffer(ary),
                     dtype=ary.dtype,
                     mask=self.mask,
                 )
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 625a9c70873..f58180ff5dd 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -33,7 +33,7 @@
     is_scalar,
     is_string_dtype,
 )
-from cudf.core.buffer import DeviceBufferLike
+from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
 from cudf.core.column.column import ColumnBase
 from cudf.core.column.methods import ColumnMethods
@@ -5173,7 +5173,7 @@ class StringColumn(column.ColumnBase):
 
     Parameters
     ----------
-    mask : DeviceBufferLike
+    mask : Buffer
         The validity mask
     offset : int
         Data offset
@@ -5207,7 +5207,7 @@ class StringColumn(column.ColumnBase):
 
     def __init__(
         self,
-        mask: DeviceBufferLike = None,
+        mask: Buffer = None,
         size: int = None,  # TODO: make non-optional
         offset: int = 0,
         null_count: int = None,
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index e6d688014fa..3dc923e7ded 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -13,7 +13,7 @@
 from cudf import _lib as libcudf
 from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype
 from cudf.api.types import is_scalar, is_timedelta64_dtype
-from cudf.core.buffer import DeviceBufferLike
+from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, column, string
 from cudf.utils.dtypes import np_to_pa_dtype
 from cudf.utils.utils import _fillna_natwise
@@ -40,13 +40,13 @@ class TimeDeltaColumn(ColumnBase):
     """
     Parameters
     ----------
-    data : DeviceBufferLike
+    data : Buffer
         The Timedelta values
     dtype : np.dtype
         The data type
     size : int
         Size of memory allocation.
-    mask : DeviceBufferLike; optional
+    mask : Buffer; optional
         The validity mask
     offset : int
         Data offset
@@ -78,19 +78,17 @@ class TimeDeltaColumn(ColumnBase):
 
     def __init__(
         self,
-        data: DeviceBufferLike,
+        data: Buffer,
         dtype: Dtype,
         size: int = None,  # TODO: make non-optional
-        mask: DeviceBufferLike = None,
+        mask: Buffer = None,
         offset: int = 0,
         null_count: int = None,
     ):
         dtype = cudf.dtype(dtype)
 
         if data.size % dtype.itemsize:
-            raise ValueError(
-                "DeviceBufferLike size must be divisible by element size"
-            )
+            raise ValueError("Buffer size must be divisible by element size")
         if size is None:
             size = data.size // dtype.itemsize
             size = size - offset
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index d770f4f6130..b29fc41e5b4 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -18,7 +18,7 @@
 from numba.cuda import as_cuda_array
 
 import cudf
-from cudf.core.buffer import Buffer, DeviceBufferLike
+from cudf.core.buffer import Buffer, as_buffer
 from cudf.core.column import as_column, build_categorical_column, build_column
 
 # Implementation of interchange protocol classes
@@ -64,12 +64,12 @@ class _CuDFBuffer:
 
     def __init__(
         self,
-        buf: DeviceBufferLike,
+        buf: Buffer,
         dtype: np.dtype,
         allow_copy: bool = True,
     ) -> None:
         """
-        Use DeviceBufferLike object.
+        Use Buffer object.
         """
         # Store the cudf buffer where the data resides as a private
         # attribute, so we can use it to retrieve the public attributes
@@ -80,7 +80,7 @@ def __init__(
     @property
     def bufsize(self) -> int:
         """
-        The DeviceBufferLike size in bytes.
+        The Buffer size in bytes.
         """
         return self._buf.size
 
@@ -627,7 +627,7 @@ def __dataframe__(
 Notes
 -----
 
-- Interpreting a raw pointer (as in ``DeviceBufferLike.ptr``) is annoying and
+- Interpreting a raw pointer (as in ``Buffer.ptr``) is annoying and
   unsafe to do in pure Python. It's more general but definitely less friendly
   than having ``to_arrow`` and ``to_numpy`` methods. So for the buffers which
   lack ``__dlpack__`` (e.g., because the column dtype isn't supported by
@@ -721,7 +721,7 @@ def _protocol_to_cudf_column_numeric(
     _dbuffer, _ddtype = buffers["data"]
     _check_buffer_is_on_gpu(_dbuffer)
     cudfcol_num = build_column(
-        Buffer(data=_dbuffer.ptr, size=_dbuffer.bufsize, owner=None),
+        as_buffer(data=_dbuffer.ptr, size=_dbuffer.bufsize, owner=None),
         protocol_dtype_to_cupy_dtype(_ddtype),
     )
     return _set_missing_values(col, cudfcol_num), buffers
@@ -751,9 +751,7 @@ def _set_missing_values(
     valid_mask = protocol_col.get_buffers()["validity"]
     if valid_mask is not None:
         bitmask = cp.asarray(
-            Buffer(
-                data=valid_mask[0].ptr, size=valid_mask[0].bufsize, owner=None
-            ),
+            as_buffer(data=valid_mask[0].ptr, size=valid_mask[0].bufsize),
             cp.bool8,
         )
         cudf_col[~bitmask] = None
@@ -792,7 +790,7 @@ def _protocol_to_cudf_column_categorical(
     _check_buffer_is_on_gpu(codes_buffer)
     cdtype = protocol_dtype_to_cupy_dtype(codes_dtype)
     codes = build_column(
-        Buffer(data=codes_buffer.ptr, size=codes_buffer.bufsize, owner=None),
+        as_buffer(data=codes_buffer.ptr, size=codes_buffer.bufsize),
         cdtype,
     )
 
@@ -824,7 +822,7 @@ def _protocol_to_cudf_column_string(
     data_buffer, data_dtype = buffers["data"]
     _check_buffer_is_on_gpu(data_buffer)
     encoded_string = build_column(
-        Buffer(data=data_buffer.ptr, size=data_buffer.bufsize, owner=None),
+        as_buffer(data=data_buffer.ptr, size=data_buffer.bufsize),
         protocol_dtype_to_cupy_dtype(data_dtype),
     )
 
@@ -834,7 +832,7 @@ def _protocol_to_cudf_column_string(
     offset_buffer, offset_dtype = buffers["offsets"]
     _check_buffer_is_on_gpu(offset_buffer)
     offsets = build_column(
-        Buffer(data=offset_buffer.ptr, size=offset_buffer.bufsize, owner=None),
+        as_buffer(data=offset_buffer.ptr, size=offset_buffer.bufsize),
         protocol_dtype_to_cupy_dtype(offset_dtype),
     )
 
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 84f528549e9..25b1b3895de 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -20,7 +20,7 @@
 from cudf._typing import Dtype
 from cudf.core._compat import PANDAS_GE_130, PANDAS_GE_150
 from cudf.core.abc import Serializable
-from cudf.core.buffer import DeviceBufferLike
+from cudf.core.buffer import Buffer
 from cudf.utils.docutils import doc_apply
 
 if PANDAS_GE_150:
@@ -592,7 +592,7 @@ def serialize(self) -> Tuple[dict, list]:
         header: Dict[str, Any] = {}
         header["type-serialized"] = pickle.dumps(type(self))
 
-        frames: List[DeviceBufferLike] = []
+        frames: List[Buffer] = []
 
         fields: Dict[str, Union[bytes, Tuple[Any, Tuple[int, int]]]] = {}
 
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index fbaa95763a1..61971e3c749 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2974,7 +2974,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex:
     Currently supported inputs are:
 
     * ``Column``
-    * ``DeviceBufferLike``
+    * ``Buffer``
     * ``Series``
     * ``Index``
     * numba device array
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f9600c84f5e..70e8c3d6860 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1831,8 +1831,6 @@ def data(self):
         2    3
         3    4
         dtype: int64
-        >>> series.data
-        <cudf.core.buffer.Buffer ...>
         >>> np.array(series.data.memoryview())
         array([1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0,
                0, 0, 4, 0, 0, 0, 0, 0, 0, 0], dtype=uint8)
diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/test_buffer.py
index eaa615a2839..5ed5750f29b 100644
--- a/python/cudf/cudf/tests/test_buffer.py
+++ b/python/cudf/cudf/tests/test_buffer.py
@@ -1,10 +1,9 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
-from typing import Callable
 
 import cupy as cp
 import pytest
 
-from cudf.core.buffer import Buffer, DeviceBufferLike, as_device_buffer_like
+from cudf.core.buffer import Buffer, as_buffer
 
 arr_len = 10
 
@@ -23,10 +22,10 @@
 def test_buffer_from_cuda_iface_contiguous(data):
     data, expect_success = data
     if expect_success:
-        as_device_buffer_like(data.view("|u1"))
+        as_buffer(data.view("|u1"))
     else:
         with pytest.raises(ValueError):
-            as_device_buffer_like(data.view("|u1"))
+            as_buffer(data.view("|u1"))
 
 
 @pytest.mark.parametrize(
@@ -41,24 +40,23 @@ def test_buffer_from_cuda_iface_contiguous(data):
 @pytest.mark.parametrize("dtype", ["uint8", "int8", "float32", "int32"])
 def test_buffer_from_cuda_iface_dtype(data, dtype):
     data = data.astype(dtype)
-    buf = as_device_buffer_like(data)
+    buf = as_buffer(data)
     got = cp.array(buf).reshape(-1).view("uint8")
     expect = data.reshape(-1).view("uint8")
     assert (expect == got).all()
 
 
-@pytest.mark.parametrize("creator", [Buffer, as_device_buffer_like])
-def test_buffer_creation_from_any(creator: Callable[[object], Buffer]):
+def test_buffer_creation_from_any():
     ary = cp.arange(arr_len)
-    b = creator(ary)
-    assert isinstance(b, DeviceBufferLike)
+    b = as_buffer(ary)
+    assert isinstance(b, Buffer)
     assert ary.__cuda_array_interface__["data"][0] == b.ptr
     assert ary.nbytes == b.size
 
     with pytest.raises(
         ValueError, match="size must be specified when `data` is an integer"
     ):
-        Buffer(42)
+        as_buffer(42)
 
 
 @pytest.mark.parametrize(
@@ -66,7 +64,7 @@ def test_buffer_creation_from_any(creator: Callable[[object], Buffer]):
 )
 def test_buffer_repr(size, expect):
     ary = cp.arange(size, dtype="uint8")
-    buf = as_device_buffer_like(ary)
+    buf = as_buffer(ary)
     assert f"size={expect}" in repr(buf)
 
 
@@ -83,25 +81,25 @@ def test_buffer_repr(size, expect):
 )
 def test_buffer_slice(idx):
     ary = cp.arange(arr_len, dtype="uint8")
-    buf = as_device_buffer_like(ary)
+    buf = as_buffer(ary)
     expect = ary[idx]
     got = cp.array(buf[idx])
     assert (expect == got).all()
 
 
 @pytest.mark.parametrize(
-    "idx, err_msg",
+    "idx, err_type, err_msg",
     [
-        (1, "index must be an slice"),
-        (slice(3, 2), "size cannot be negative"),
-        (slice(1, 2, 2), "slice must be contiguous"),
-        (slice(1, 2, -1), "slice must be contiguous"),
-        (slice(3, 2, -1), "slice must be contiguous"),
+        (1, TypeError, "Argument 'key' has incorrect type"),
+        (slice(3, 2), ValueError, "size cannot be negative"),
+        (slice(1, 2, 2), ValueError, "slice must be C-contiguous"),
+        (slice(1, 2, -1), ValueError, "slice must be C-contiguous"),
+        (slice(3, 2, -1), ValueError, "slice must be C-contiguous"),
     ],
 )
-def test_buffer_slice_fail(idx, err_msg):
+def test_buffer_slice_fail(idx, err_type, err_msg):
     ary = cp.arange(arr_len, dtype="uint8")
-    buf = as_device_buffer_like(ary)
+    buf = as_buffer(ary)
 
-    with pytest.raises(ValueError, match=err_msg):
+    with pytest.raises(err_type, match=err_msg):
         buf[idx]
diff --git a/python/cudf/cudf/tests/test_column.py b/python/cudf/cudf/tests/test_column.py
index 4e2a26d31bd..467c88b200f 100644
--- a/python/cudf/cudf/tests/test_column.py
+++ b/python/cudf/cudf/tests/test_column.py
@@ -406,7 +406,7 @@ def test_column_view_string_slice(slc):
 )
 def test_as_column_buffer(data, expected):
     actual_column = cudf.core.column.as_column(
-        cudf.core.buffer.as_device_buffer_like(data), dtype=data.dtype
+        cudf.core.buffer.as_buffer(data), dtype=data.dtype
     )
     assert_eq(cudf.Series(actual_column), cudf.Series(expected))
 
diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
index 2a62a289747..9b9709b52c3 100644
--- a/python/cudf/cudf/tests/test_cuda_array_interface.py
+++ b/python/cudf/cudf/tests/test_cuda_array_interface.py
@@ -179,9 +179,7 @@ def test_cuda_array_interface_pytorch():
     got = cudf.Series(tensor)
 
     assert_eq(got, series)
-    buffer = cudf.core.buffer.as_device_buffer_like(
-        cupy.ones(10, dtype=np.bool_)
-    )
+    buffer = cudf.core.buffer.as_buffer(cupy.ones(10, dtype=np.bool_))
     tensor = torch.tensor(buffer)
     got = cudf.Series(tensor, dtype=np.bool_)
 
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index 7b83eec9b63..6f8305e6751 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -7,7 +7,7 @@
 import pytest
 
 import cudf
-from cudf.core.buffer import Buffer
+from cudf.core.buffer import as_buffer
 from cudf.core.column import build_column
 from cudf.core.df_protocol import (
     DataFrameObject,
@@ -25,7 +25,7 @@ def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
     device_id = cp.asarray(cudfcol.data).device.id
     assert buf.__dlpack_device__() == (2, device_id)
     col_from_buf = build_column(
-        Buffer(data=buf.ptr, size=buf.bufsize, owner=None),
+        as_buffer(data=buf.ptr, size=buf.bufsize),
         protocol_dtype_to_cupy_dtype(dtype),
     )
     # check that non null values are the equals as nulls are represented
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 1427a214a72..21343f19d79 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -7,7 +7,7 @@
 import pytest
 
 from cudf import DataFrame, GenericIndex, RangeIndex, Series
-from cudf.core.buffer import as_device_buffer_like
+from cudf.core.buffer import as_buffer
 from cudf.testing._utils import assert_eq
 
 if sys.version_info < (3, 8):
@@ -97,7 +97,7 @@ def test_pickle_index():
 
 def test_pickle_buffer():
     arr = np.arange(10).view("|u1")
-    buf = as_device_buffer_like(arr)
+    buf = as_buffer(arr)
     assert buf.size == arr.nbytes
     pickled = pickle.dumps(buf)
     unpacked = pickle.loads(pickled)
diff --git a/python/cudf/cudf/tests/test_testing.py b/python/cudf/cudf/tests/test_testing.py
index 60f01d567ef..c3dfeac9a3f 100644
--- a/python/cudf/cudf/tests/test_testing.py
+++ b/python/cudf/cudf/tests/test_testing.py
@@ -429,7 +429,7 @@ def test_assert_column_memory_slice(arrow_arrays):
 
 def test_assert_column_memory_basic_same(arrow_arrays):
     data = cudf.core.column.ColumnBase.from_arrow(arrow_arrays)
-    buf = cudf.core.buffer.as_device_buffer_like(data.base_data)
+    buf = cudf.core.buffer.as_buffer(data.base_data)
 
     left = cudf.core.column.build_column(buf, dtype=np.int32)
     right = cudf.core.column.build_column(buf, dtype=np.int32)
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 87596482d79..c5f4629483a 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -16,7 +16,7 @@
 import cudf
 import cudf.api.types
 from cudf.core import column
-from cudf.core.buffer import as_device_buffer_like
+from cudf.core.buffer import as_buffer
 
 # The size of the mask in bytes
 mask_dtype = cudf.api.types.dtype(np.int32)
@@ -293,8 +293,8 @@ def pa_mask_buffer_to_mask(mask_buf, size):
     if mask_buf.size < mask_size:
         dbuf = rmm.DeviceBuffer(size=mask_size)
         dbuf.copy_from_host(np.asarray(mask_buf).view("u1"))
-        return as_device_buffer_like(dbuf)
-    return as_device_buffer_like(mask_buf)
+        return as_buffer(dbuf)
+    return as_buffer(mask_buf)
 
 
 def _isnat(val):
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index 7a0cdeb10b9..db6e206843c 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -3,7 +3,7 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
-from cudf.core.buffer import Buffer
+from cudf.core.buffer import as_buffer
 
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column_view
@@ -21,4 +21,4 @@ def to_string_view_array(Column strings_col):
         c_buffer = move(cpp_to_string_view_array(input_view))
 
     device_buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
-    return Buffer(device_buffer)
+    return as_buffer(device_buffer)

From a3d227684101a7161d5ce6b0595ef36a9a6bc36b Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 3 Nov 2022 00:00:17 +0530
Subject: [PATCH 113/202] Fixes bug in csv_reader_options construction in
 cython (#12021)

Fixes bug in csv_reader_options construction in cython
The false values for csv were not passed to the csv_reader_options during construction in cython code. This is fixed and a unit test is added.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12021
---
 python/cudf/cudf/_lib/csv.pyx      |  2 +-
 python/cudf/cudf/tests/test_csv.py | 75 ++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
index 86808b8ba00..403b7acde5c 100644
--- a/python/cudf/cudf/_lib/csv.pyx
+++ b/python/cudf/cudf/_lib/csv.pyx
@@ -297,7 +297,7 @@ cdef csv_reader_options make_csv_reader_options(
 
     if false_values is not None:
         c_false_values.reserve(len(false_values))
-        for fv in c_false_values:
+        for fv in false_values:
             c_false_values.push_back(fv.encode())
         csv_reader_options_c.set_false_values(c_false_values)
 
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index b91893d8991..e85d404d2c4 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -766,6 +766,81 @@ def test_csv_reader_bools(tmpdir, names, dtypes, data, trues, falses):
     assert_eq(df_out, out)
 
 
+def test_csv_reader_bools_custom():
+    names = ["text", "bool"]
+    dtypes = {"text": "str", "bool": "bool"}
+    trues = ["foo", "1"]
+    falses = ["bar", "0"]
+    lines = [
+        ",".join(names),
+        "true,true",
+        "false,false",
+        "foo,foo",
+        "bar,bar",
+        "0,0",
+        "1,1",
+    ]
+    buffer = "\n".join(lines)
+
+    df = read_csv(
+        StringIO(buffer),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+
+    # Note: bool literals give parsing errors as int
+    # "0" and "1" give parsing errors as bool in pandas
+    expected = pd.read_csv(
+        StringIO(buffer),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+    assert_eq(df, expected, check_dtype=True)
+
+
+def test_csv_reader_bools_NA():
+    names = ["text", "int"]
+    dtypes = ["str", "int"]
+    trues = ["foo"]
+    falses = ["bar"]
+    lines = [
+        ",".join(names),
+        "true,true",
+        "false,false",
+        "foo,foo",
+        "bar,bar",
+        "qux,qux",
+    ]
+
+    buffer = "\n".join(lines)
+
+    df = read_csv(
+        StringIO(buffer),
+        names=names,
+        dtype=dtypes,
+        skiprows=1,
+        true_values=trues,
+        false_values=falses,
+    )
+    assert len(df.columns) == 2
+    assert df["text"].dtype == np.dtype("object")
+    assert df["int"].dtype == np.dtype("int64")
+    expected = pd.DataFrame(
+        {
+            "text": ["true", "false", "foo", "bar", "qux"],
+            "int": [1, 0, 1, 0, 0],
+        }
+    )
+    # breaking behaviour is np.nan for qux
+    assert_eq(df, expected)
+
+
 def test_csv_quotednumbers(tmpdir):
     fname = tmpdir.mkdir("gdf_csv").join("tmp_csvreader_file12.csv")
 

From 49fc3c7530af1cf852cb80747fdf96eec627c755 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 2 Nov 2022 17:05:50 -0500
Subject: [PATCH 114/202] Enable CEC for `strings_udf` (#11884)

This PR removes the runtime checks for CEC in `strings_udf`.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Graham Markall (https://github.com/gmarkall)
  - Lawrence Mitchell (https://github.com/wence-)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/11884
---
 ci/gpu/build.sh                               |  17 +--
 conda/recipes/strings_udf/meta.yaml           |   2 +-
 python/cudf/cudf/core/udf/__init__.py         |  17 ++-
 python/cudf/cudf/core/udf/strings_utils.py    |   0
 python/strings_udf/strings_udf/__init__.py    | 137 +++++++++---------
 .../strings_udf/tests/test_string_udfs.py     |   3 -
 6 files changed, 81 insertions(+), 95 deletions(-)
 create mode 100644 python/cudf/cudf/core/udf/strings_utils.py

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 4d86d19a7d2..7d67efa77b1 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -295,22 +295,15 @@ py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml
 gpuci_logger "Installing strings_udf"
 gpuci_mamba_retry install strings_udf -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
 
-# only install strings_udf after cuDF is finished testing without its presence
 cd "$WORKSPACE/python/strings_udf/strings_udf"
 gpuci_logger "Python py.test for strings_udf"
+py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests
 
-STRINGS_UDF_PYTEST_RETCODE=0
-py.test -n 8 --cache-clear --basetemp="$WORKSPACE/strings-udf-cuda-tmp" --junitxml="$WORKSPACE/junit-strings-udf.xml" -v --cov-config=.coveragerc --cov=strings_udf --cov-report=xml:"$WORKSPACE/python/strings_udf/strings-udf-coverage.xml" --cov-report term tests || STRINGS_UDF_PYTEST_RETCODE=$?
+# retest cuDF UDFs
+cd "$WORKSPACE/python/cudf/cudf"
+gpuci_logger "Python py.test retest cuDF UDFs"
+py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests/test_udf_masked_ops.py
 
-if [ ${STRINGS_UDF_PYTEST_RETCODE} -eq 5 ]; then
-    echo "No strings UDF tests were run, but this script will continue to execute."
-elif [ ${STRINGS_UDF_PYTEST_RETCODE} -ne 0 ]; then
-    exit ${STRINGS_UDF_PYTEST_RETCODE}
-else
-    cd "$WORKSPACE/python/cudf/cudf"
-    gpuci_logger "Python py.test retest cuDF UDFs"
-    py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-strings-udf-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf-strings-udf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-strings-udf-coverage.xml" --cov-report term --dist=loadscope tests
-fi
 
 # Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
 # Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
diff --git a/conda/recipes/strings_udf/meta.yaml b/conda/recipes/strings_udf/meta.yaml
index a736edef24d..027a8a82aae 100644
--- a/conda/recipes/strings_udf/meta.yaml
+++ b/conda/recipes/strings_udf/meta.yaml
@@ -50,7 +50,7 @@ requirements:
     - cudf ={{ version }}
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
     - cachetools
-    - ptxcompiler  # [linux64]  # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler
+    - ptxcompiler >=0.7.0  # [linux64]  # CUDA enhanced compatibility. See https://github.com/rapidsai/ptxcompiler
 test:                                   # [linux64]
   requires:                             # [linux64]
     - cudatoolkit {{ cuda_version }}.*  # [linux64]
diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 443466b28bd..4730f1fa296 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -23,17 +23,20 @@
     | {types.boolean}
 )
 _STRING_UDFS_ENABLED = False
+
 try:
     import strings_udf
+    from strings_udf import ptxpath
+
+    if ptxpath:
+        utils.ptx_files.append(ptxpath)
 
-    if strings_udf.ENABLED:
-        from . import strings_typing  # isort: skip
-        from . import strings_lowering  # isort: skip
-        from strings_udf import ptxpath
         from strings_udf._lib.cudf_jit_udf import to_string_view_array
         from strings_udf._typing import str_view_arg_handler, string_view
 
-        # add an overload of MaskedType.__init__(string_view, bool)
+        from . import strings_typing  # isort: skip
+        from . import strings_lowering  # isort: skip
+
         cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
             masked_lowering.masked_constructor
         )
@@ -47,13 +50,11 @@
         utils.launch_arg_getters[dtype("O")] = to_string_view_array
         utils.masked_array_types[dtype("O")] = string_view
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
-        utils.ptx_files.append(ptxpath)
+
         utils.arg_handlers.append(str_view_arg_handler)
         row_function.itemsizes[dtype("O")] = string_view.size_bytes
 
         _STRING_UDFS_ENABLED = True
-    else:
-        del strings_udf
 
 except ImportError as e:
     # allow cuDF to work without strings_udf
diff --git a/python/cudf/cudf/core/udf/strings_utils.py b/python/cudf/cudf/core/udf/strings_utils.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py
index 2cedc0288d1..24f1a2d3bda 100644
--- a/python/strings_udf/strings_udf/__init__.py
+++ b/python/strings_udf/strings_udf/__init__.py
@@ -1,36 +1,20 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 import glob
 import os
-import re
-import subprocess
-import sys
 
+from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker
 from numba import cuda
-from ptxcompiler.patch import CMD
+from numba.cuda.cudadrv.driver import Linker
+from ptxcompiler.patch import NO_DRIVER, safe_get_versions
 
 from . import _version
 
 __version__ = _version.get_versions()["version"]
 
-ENABLED = False
+logger = get_logger()
 
-
-def compiler_from_ptx_file(path):
-    """Parse a PTX file header and extract the CUDA version used to compile it.
-    Here is an example PTX header that this function should parse:
-    // Generated by NVIDIA NVVM Compiler
-    //
-    // Compiler Build ID: CL-30672275
-    // Cuda compilation tools, release 11.5, V11.5.119
-    // Based on NVVM 7
-    """
-    file = open(path).read()
-    major, minor = (
-        re.search(r"Cuda compilation tools, release ([0-9\.]+)", file)
-        .group(1)
-        .split(".")
-    )
-    return int(major), int(minor)
+# tracks the version of CUDA used to build the c++ and PTX components
+STRINGS_UDF_PTX_VERSION = (11, 5)
 
 
 def _get_appropriate_file(sms, cc):
@@ -41,59 +25,70 @@ def _get_appropriate_file(sms, cc):
         return None
 
 
-# adapted from PTXCompiler
-cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True)
-if cp.returncode == 0:
-    # must have a driver to proceed
-    versions = [int(s) for s in cp.stdout.strip().split()]
-    driver_version = tuple(versions[:2])
-    runtime_version = tuple(versions[2:])
+def maybe_patch_numba_linker(driver_version):
+    # Numba thinks cubinlinker is only needed if the driver is older than the ctk
+    # but when strings_udf is present, it might also need to patch because the PTX
+    # file strings_udf relies on may be newer than the driver as well
+    if driver_version < STRINGS_UDF_PTX_VERSION:
+        logger.debug(
+            "Driver version %s.%s needs patching due to strings_udf"
+            % driver_version
+        )
+        if _numba_version_ok:
+            logger.debug("Patching Numba Linker")
+            Linker.new = new_patched_linker
+        else:
+            logger.debug("Cannot patch Numba Linker - unsupported version")
+
+
+def _get_ptx_file():
+    if "RAPIDS_NO_INITIALIZE" in os.environ:
+        cc = int(os.environ.get("STRINGS_UDF_CC", "52"))
+    else:
+        dev = cuda.get_current_device()
 
-    # CUDA enhanced compatibility not yet enabled
-    if driver_version >= runtime_version:
         # Load the highest compute capability file available that is less than
         # the current device's.
-        dev = cuda.get_current_device()
         cc = int("".join(str(x) for x in dev.compute_capability))
-        files = glob.glob(
-            os.path.join(os.path.dirname(__file__), "shim_*.ptx")
+    files = glob.glob(os.path.join(os.path.dirname(__file__), "shim_*.ptx"))
+    if len(files) == 0:
+        raise RuntimeError(
+            "This strings_udf installation is missing the necessary PTX "
+            f"files for compute capability {cc}. "
+            "Please file an issue reporting this error and how you "
+            "installed cudf and strings_udf."
+            "https://github.com/rapidsai/cudf/issues"
         )
-        if len(files) == 0:
-            raise RuntimeError(
-                "This strings_udf installation is missing the necessary PTX "
-                "files. Please file an issue reporting this error and how you "
-                "installed cudf and strings_udf."
-            )
-
-        suffix_a_sm = None
-        regular_sms = []
-
-        for f in files:
-            file_name = os.path.basename(f)
-            sm_number = file_name.rstrip(".ptx").lstrip("shim_")
-            if sm_number.endswith("a"):
-                processed_sm_number = int(sm_number.rstrip("a"))
-                if processed_sm_number == cc:
-                    suffix_a_sm = (processed_sm_number, f)
-            else:
-                regular_sms.append((int(sm_number), f))
-
-        regular_result = None
-
-        if regular_sms:
-            regular_result = _get_appropriate_file(regular_sms, cc)
-
-        if suffix_a_sm is None and regular_result is None:
-            raise RuntimeError(
-                "This strings_udf installation is missing the necessary PTX "
-                f"files that are <={cc}."
-            )
-        elif suffix_a_sm is not None:
-            ptxpath = suffix_a_sm[1]
-        else:
-            ptxpath = regular_result[1]
 
-        if driver_version >= compiler_from_ptx_file(ptxpath):
-            ENABLED = True
+    regular_sms = []
+
+    for f in files:
+        file_name = os.path.basename(f)
+        sm_number = file_name.rstrip(".ptx").lstrip("shim_")
+        if sm_number.endswith("a"):
+            processed_sm_number = int(sm_number.rstrip("a"))
+            if processed_sm_number == cc:
+                return f
         else:
-            del ptxpath
+            regular_sms.append((int(sm_number), f))
+
+    regular_result = None
+
+    if regular_sms:
+        regular_result = _get_appropriate_file(regular_sms, cc)
+
+    if regular_result is None:
+        raise RuntimeError(
+            "This strings_udf installation is missing the necessary PTX "
+            f"files that are <={cc}."
+        )
+    else:
+        return regular_result[1]
+
+
+ptxpath = None
+versions = safe_get_versions()
+if versions != NO_DRIVER:
+    driver_version, runtime_version = versions
+    maybe_patch_numba_linker(driver_version)
+    ptxpath = _get_ptx_file()
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index f214915ae12..1a5dfa00dd7 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -15,9 +15,6 @@
 from strings_udf._lib.cudf_jit_udf import to_string_view_array
 from strings_udf._typing import str_view_arg_handler, string_view
 
-if not strings_udf.ENABLED:
-    pytest.skip("Strings UDF not enabled.", allow_module_level=True)
-
 
 def get_kernel(func, dtype):
     """

From 856ac3fac50d4d8609ab8637e51c191df6757d9f Mon Sep 17 00:00:00 2001
From: Ed Seidl <etseidl@users.noreply.github.com>
Date: Wed, 2 Nov 2022 15:10:48 -0700
Subject: [PATCH 115/202] Add full page indexes to Parquet writer benchmarks
 (#11955)

Adds `statistics_freq::STATISTICS_COLUMN` to list of parquet writer options to benchmark.  This should have been included in #11302.

Authors:
  - Ed Seidl (https://github.com/etseidl)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11955
---
 cpp/benchmarks/io/parquet/parquet_writer.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/benchmarks/io/parquet/parquet_writer.cpp b/cpp/benchmarks/io/parquet/parquet_writer.cpp
index 753ffbd00c9..a0b076abfda 100644
--- a/cpp/benchmarks/io/parquet/parquet_writer.cpp
+++ b/cpp/benchmarks/io/parquet/parquet_writer.cpp
@@ -32,6 +32,7 @@ NVBENCH_DECLARE_ENUM_TYPE_STRINGS(
       case cudf::io::statistics_freq::STATISTICS_NONE: return "STATISTICS_NONE";
       case cudf::io::statistics_freq::STATISTICS_ROWGROUP: return "STATISTICS_ROWGROUP";
       case cudf::io::statistics_freq::STATISTICS_PAGE: return "STATISTICS_PAGE";
+      case cudf::io::statistics_freq::STATISTICS_COLUMN: return "STATISTICS_COLUMN";
       default: return "Unknown";
     }
   },
@@ -201,6 +202,7 @@ using compression_list =
 
 using stats_list = nvbench::enum_type_list<cudf::io::STATISTICS_NONE,
                                            cudf::io::STATISTICS_ROWGROUP,
+                                           cudf::io::STATISTICS_COLUMN,
                                            cudf::io::STATISTICS_PAGE>;
 
 NVBENCH_BENCH_TYPES(BM_parq_write_encode, NVBENCH_TYPE_AXES(d_type_list))

From d949cd2336bd0e613f2f103bea9ad088d38c1142 Mon Sep 17 00:00:00 2001
From: Paul Taylor <paul.e.taylor@me.com>
Date: Wed, 2 Nov 2022 15:34:00 -0700
Subject: [PATCH 116/202] Make all `nvcc` warnings into errors (#8916)

Seeing what impact [`-Werror=all-warnings`](https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#generic-tool-options-Werror) has on device-side compilation.

Device warnings now treated as errors:
```
cudf/cpp/src/io/orc/stripe_enc.cu (633): error: dynamic initialization is not supported for a function-scope static __shared__ variable within a __device__/__global__ function

cudf/cpp/src/io/orc/writer_impl.cu
ptxas error   : Stack size for entry function '_ZN4cudf6detail20single_thread_kernelIZNS_2io6detail3orc19make_orc_table_viewERKNS_10table_viewERKNS_17table_device_viewEPKNS2_14table_metadataEN3rmm16cuda_stream_viewEEUlvE_EEvT_' cannot be statically determined

cudf/cpp/src/binaryop/compiled/binary_ops.cu(46): error: parameter "mr" was declared but never referenced

cudf/cpp/src/binaryop/compiled/binary_ops.cu(204): error: variable "out" was declared but never referenced
```

Authors:
  - Paul Taylor (https://github.com/trxcllnt)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Jake Hemstad (https://github.com/jrhemstad)
  - Bradley Dice (https://github.com/bdice)
  - Robert Maynard (https://github.com/robertmaynard)

URL: https://github.com/rapidsai/cudf/pull/8916
---
 cpp/CMakeLists.txt                    | 1 +
 cpp/cmake/Modules/ConfigureCUDA.cmake | 6 +++++-
 cpp/include/cudf/detail/interop.hpp   | 6 ++++++
 cpp/include/cudf/interop.hpp          | 5 +++++
 cpp/include/cudf/io/datasource.hpp    | 7 +++++++
 5 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 289c432dea5..d63c7e75616 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -66,6 +66,7 @@ option(DISABLE_DEPRECATION_WARNINGS "Disable warnings generated from deprecated
 option(CUDA_ENABLE_LINEINFO
        "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF
 )
+option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
 
diff --git a/cpp/cmake/Modules/ConfigureCUDA.cmake b/cpp/cmake/Modules/ConfigureCUDA.cmake
index 9ee4d3e411c..f79e4c37228 100644
--- a/cpp/cmake/Modules/ConfigureCUDA.cmake
+++ b/cpp/cmake/Modules/ConfigureCUDA.cmake
@@ -19,7 +19,11 @@ endif()
 list(APPEND CUDF_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr)
 
 # set warnings as errors
-list(APPEND CUDF_CUDA_FLAGS -Werror=cross-execution-space-call)
+if(CUDA_WARNINGS_AS_ERRORS)
+  list(APPEND CUDF_CUDA_FLAGS -Werror=all-warnings)
+else()
+  list(APPEND CUDF_CUDA_FLAGS -Werror=cross-execution-space-call)
+endif()
 list(APPEND CUDF_CUDA_FLAGS -Xcompiler=-Wall,-Werror,-Wno-error=deprecated-declarations)
 
 if(DISABLE_DEPRECATION_WARNINGS)
diff --git a/cpp/include/cudf/detail/interop.hpp b/cpp/include/cudf/detail/interop.hpp
index 2215f052113..5a5bbe7f683 100644
--- a/cpp/include/cudf/detail/interop.hpp
+++ b/cpp/include/cudf/detail/interop.hpp
@@ -16,7 +16,13 @@
 
 #pragma once
 
+// We disable warning 611 because the `arrow::TableBatchReader` only partially
+// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
+// triggering warning 611-D from nvcc.
+#pragma nv_diag_suppress 611
 #include <arrow/api.h>
+#pragma nv_diag_default 611
+
 #include <cudf/interop.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 016e23688c7..c1c58f136d6 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -16,7 +16,12 @@
 
 #pragma once
 
+// We disable warning 611 because the `arrow::TableBatchReader` only partially
+// override the `ReadNext` method of `arrow::RecordBatchReader::ReadNext`
+// triggering warning 611-D from nvcc.
+#pragma nv_diag_suppress 611
 #include <arrow/api.h>
+#pragma nv_diag_default 611
 
 #include <cudf/column/column.hpp>
 #include <cudf/detail/transform.hpp>
diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index 907830de2bb..251a93ac21f 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -22,8 +22,15 @@
 #include <rmm/cuda_stream_view.hpp>
 
 #include <arrow/buffer.h>
+
+// We disable warning 611 because some Arrow subclasses of
+// `arrow::fs::FileSystem` only partially override the `Equals` method,
+// triggering warning 611-D from nvcc.
+#pragma nv_diag_suppress 611
 #include <arrow/filesystem/filesystem.h>
 #include <arrow/filesystem/s3fs.h>
+#pragma nv_diag_default 611
+
 #include <arrow/io/file.h>
 #include <arrow/io/interfaces.h>
 #include <arrow/io/memory.h>

From eaa0706ef29c7b5dda588b84d1dee26abb4d8e3a Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 2 Nov 2022 17:34:07 -0700
Subject: [PATCH 117/202] Add developer docs for writing tests (#11199)

This PR adds documentation on how Python tests should be written.

Related to #4730. This PR will establish best practices. Follow-up PRs will be needed to implement them.

Resolves #6481.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)
  - Ashwin Srinath (https://github.com/shwina)

URL: https://github.com/rapidsai/cudf/pull/11199
---
 docs/cudf/source/developer_guide/index.md   |   1 +
 docs/cudf/source/developer_guide/testing.md | 150 ++++++++++++++++++++
 2 files changed, 151 insertions(+)
 create mode 100644 docs/cudf/source/developer_guide/testing.md

diff --git a/docs/cudf/source/developer_guide/index.md b/docs/cudf/source/developer_guide/index.md
index 04f2bf6b8c0..4c4f4c0ad49 100644
--- a/docs/cudf/source/developer_guide/index.md
+++ b/docs/cudf/source/developer_guide/index.md
@@ -23,6 +23,7 @@ Additionally, it includes longer sections on more specific topics like testing a
 library_design
 contributing_guide
 documentation
+testing
 benchmarking
 options
 ```
diff --git a/docs/cudf/source/developer_guide/testing.md b/docs/cudf/source/developer_guide/testing.md
new file mode 100644
index 00000000000..e64e5fbd0a9
--- /dev/null
+++ b/docs/cudf/source/developer_guide/testing.md
@@ -0,0 +1,150 @@
+# Testing cuDF
+
+## Tooling
+Tests in cuDF are written using [`pytest`](https://docs.pytest.org/en/latest/).
+Test coverage is measured using [`coverage.py`](https://coverage.readthedocs.io/en/latest/),
+specifically the [`pytest-cov`](https://github.com/pytest-dev/pytest-cov) plugin.
+Code coverage reports are uploaded to [Codecov](https://app.codecov.io/gh/rapidsai/cudf).
+Each PR also indicates whether it increases or decreases test coverage.
+
+## Test organization
+
+How tests are organized depends on which of the following two groups they fall into:
+
+1. Free functions such as `cudf.merge` that operate on classes like `DataFrame` or `Series`.
+2. Methods of the above classes.
+
+Tests of free functions should be grouped into files based on the
+[API sections in the documentation](https://docs.rapids.ai/api/cudf/latest/api_docs/index.html).
+This places tests of similar functionality in the same module.
+Tests of class methods should be organized in the same way, except that this organization should be within a subdirectory corresponding to the class.
+For instance, tests of `DataFrame` indexing should be placed into `dataframe/test_indexing.py`.
+In cases where tests may be shared by multiple classes sharing a common parent (e.g. `DataFrame` and `Series` both require `IndexedFrame` tests),
+the tests may be placed in a directory corresponding to the parent class.
+
+## Test contents
+
+### Writing tests
+
+In general, functionality must be tested for both standard and exceptional cases.
+Standard use cases may be covered using parametrization (using `pytest.mark.parametrize`).
+Tests of standard use cases should typically include some coverage of:
+- Different dtypes, including nested dtypes (especially strings)
+- Mixed objects, e.g. binary operations between `DataFrame` and `Series`
+- Operations on scalars
+- Verifying all combinations of parameters for complex APIs like `cudf.merge`.
+
+Here are some of the most common exceptional cases to test:
+1. `Series`/`DataFrame`/`Index` with zero rows
+2. `DataFrame` with zero columns
+3. All null data
+4. For string or list APIs, empty strings/lists
+5. For list APIs, lists containing all null elements or empty strings
+6. For numeric data:
+  1. All 0s.
+  2. All 1s.
+  3. Containing/all inf
+  4. Containing/all nan
+  5. `INT${PRECISION}_MAX` for a given precision (e.g. `2**32` for `int32`).
+
+Most specific APIs will also include a range of other cases.
+
+In general, it is preferable to write separate tests for different exceptional cases.
+Excessive parametrization and branching increases complexity and obfuscates the purpose of a test.
+Typically, exception cases require specific assertions or other special logic, so they are best kept separate.
+The main exception to this rule is tests based on comparison to pandas.
+Such tests may test exceptional cases alongside more typical cases since the logic is generally identical.
+
+### Parametrization: custom fixtures and `pytest.mark.parametrize`
+
+When it comes to parametrizing tests written with `pytest`,
+the two main options are [fixtures](https://docs.pytest.org/en/latest/explanation/fixtures.html)
+and [`mark.parametrize`](https://docs.pytest.org/en/latest/how-to/parametrize.html#pytest-mark-parametrize).
+By virtue of being functions, fixtures are both more verbose and more self-documenting.
+Fixtures also have the significant benefit of being constructed lazily,
+whereas parametrizations are constructed at test collection time.
+
+In general, these approaches are applicable to parametrizations of different complexity.
+For the purpose of this discussion,
+we define a parametrization as "simple" if it is composed of a list (possibly nested) of primitive objects.
+Examples include a list of integers or a list of list of strings.
+This _does not_ include e.g. cuDF or pandas objects.
+In particular, developers should avoid performing GPU memory allocations during test collection.
+
+With that in mind, here are some ground rules for how to parametrize.
+
+Use `pytest.mark.parametrize` when:
+- One test must be run on many inputs and those inputs are simple to construct.
+
+Use fixtures when:
+- One or more tests must be run on the same set of inputs,
+  and all of those inputs can be constructed with simple parametrizations.
+  In practice, that means that it is acceptable to use a fixture like this:
+  ```python
+      @pytest.fixture(params=["a", "b"])
+      def foo(request):
+          if request.param == "a":
+              # Some complex initialization
+          elif request.param == "b":
+              # Some other complex initialization
+  ```
+  In other words, the construction of the fixture may be complex,
+  as long as the parametrization of that construction is simple.
+- One or more tests must be run on the same set of inputs,
+  and at least one of those inputs requires complex parametrizations.
+  In this case, the parametrization of a fixture should be decomposed
+  by using fixtures that depend on other fixtures.
+  ```python
+      @pytest.fixture(params=["a", "b"])
+      def foo(request):
+          if request.param == "a":
+              # Some complex initialization
+          elif request.param == "b":
+              # Some other complex initialization
+
+      @pytest.fixture
+      def bar(foo):
+         # do something with foo like initialize a cudf object.
+
+      def test_some_property(bar):
+          # will be run for each value of bar that results from each value of foo.
+          assert some_property_of(bar)
+  ```
+
+#### Complex parametrizations
+
+The lists above document common use cases.
+However, more complex cases may arise.
+One of the most common alternatives is where, given a set of test cases,
+different tests need to run on different subsets with a nonempty intersection.
+Fixtures and parametrization are only capable of handling the Cartesian product of parameters,
+i.e. "run this test for all values of `a` and all values of `b`".
+
+There are multiple potential solutions to this problem.
+One possibility is to encapsulate common test logic in a helper function,
+then call it from multiple `test_*` functions that construct the necessary inputs.
+Another possibility is to use functions rather than fixtures to construct inputs, allowing for more flexible input construction:
+```python
+def get_values(predicate):
+    values = range(10)
+    yield from filter(predicate, values)
+
+def test_evens():
+    for v in get_values(lambda x: x % 2 == 0):
+        # Execute test
+
+def test_odds():
+    for v in get_values(lambda x: x % 2 == 1):
+        # Execute test
+```
+
+Other approaches are also possible, and the best solution should be discussed on a case-by-case basis during PR review.
+
+### Testing utility functions
+
+The `cudf.testing` subpackage provides a handful of utilities for testing the equality of objects.
+The internal `cudf.testing._utils` module provides additional helper functions for use in tests.
+In particular:
+- `testing._utils.assert_eq` is the biggest hammer to reach for. It can be used to compare any pair of objects.
+- For comparing specific objects, use `testing.testing.assert_[frame|series|index]_equal`.
+- For verifying that the expected assertions are raised, use `testing._utils.assert_exceptions_equal`.

From e402448bd00a841e0f1600b432b220dcdd513abf Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Thu, 3 Nov 2022 12:25:21 +0530
Subject: [PATCH 118/202] Trim quotes for non-string values in nested json
 parsing (#11898)

Trim quotes for non-string values in nested json parsing
Added corner cases for unquoted and quoted literals. (Review the unit test)
Fixes old json reader to treat `"null"` as string instead of NULL.

closes #11817

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11898
---
 cpp/include/cudf/io/detail/data_casting.cuh |  7 +-
 cpp/src/io/json/json_gpu.cu                 | 30 +++++--
 cpp/src/io/utilities/parsing_utils.cuh      | 50 ++++++++++-
 python/cudf/cudf/tests/test_json.py         | 95 +++++++++++++++++++++
 4 files changed, 169 insertions(+), 13 deletions(-)

diff --git a/cpp/include/cudf/io/detail/data_casting.cuh b/cpp/include/cudf/io/detail/data_casting.cuh
index 628c00ad603..aba9ec07bc6 100644
--- a/cpp/include/cudf/io/detail/data_casting.cuh
+++ b/cpp/include/cudf/io/detail/data_casting.cuh
@@ -391,10 +391,13 @@ std::unique_ptr<column> parse_data(str_tuple_it str_tuples,
         return;
       }
 
+      // If this is a string value, remove quotes
+      auto [in_begin, in_end] = trim_quotes(in.first, in.first + in.second, options.quotechar);
+
       auto const is_parsed = cudf::type_dispatcher(col_type,
                                                    ConvertFunctor{},
-                                                   in.first,
-                                                   in.first + in.second,
+                                                   in_begin,
+                                                   in_end,
                                                    col.data<char>(),
                                                    row,
                                                    col_type,
diff --git a/cpp/src/io/json/json_gpu.cu b/cpp/src/io/json/json_gpu.cu
index dbfcca7d37a..8b6c0f9d528 100644
--- a/cpp/src/io/json/json_gpu.cu
+++ b/cpp/src/io/json/json_gpu.cu
@@ -156,6 +156,7 @@ struct field_descriptor {
   cudf::size_type column;
   char const* value_begin;
   char const* value_end;
+  bool is_quoted;
 };
 
 /**
@@ -178,7 +179,10 @@ __device__ field_descriptor next_field_descriptor(const char* begin,
   auto const desc_pre_trim =
     col_map.capacity() == 0
       // No key - column and begin are trivial
-      ? field_descriptor{field_idx, begin, cudf::io::gpu::seek_field_end(begin, end, opts, true)}
+      ? field_descriptor{field_idx,
+                         begin,
+                         cudf::io::gpu::seek_field_end(begin, end, opts, true),
+                         false}
       : [&]() {
           auto const key_range = get_next_key(begin, end, opts.quotechar);
           auto const key_hash  = cudf::detail::MurmurHash3_32<cudf::string_view>{}(
@@ -189,14 +193,23 @@ __device__ field_descriptor next_field_descriptor(const char* begin,
 
           // Skip the colon between the key and the value
           auto const value_begin = thrust::find(thrust::seq, key_range.second, end, ':') + 1;
-          return field_descriptor{
-            column, value_begin, cudf::io::gpu::seek_field_end(value_begin, end, opts, true)};
+          return field_descriptor{column,
+                                  value_begin,
+                                  cudf::io::gpu::seek_field_end(value_begin, end, opts, true),
+                                  false};
         }();
 
   // Modify start & end to ignore whitespace and quotechars
   auto const trimmed_value_range =
-    trim_whitespaces_quotes(desc_pre_trim.value_begin, desc_pre_trim.value_end, opts.quotechar);
-  return {desc_pre_trim.column, trimmed_value_range.first, trimmed_value_range.second};
+    trim_whitespaces(desc_pre_trim.value_begin, desc_pre_trim.value_end);
+  bool const is_quoted =
+    thrust::distance(trimmed_value_range.first, trimmed_value_range.second) >= 2 and
+    *trimmed_value_range.first == opts.quotechar and
+    *thrust::prev(trimmed_value_range.second) == opts.quotechar;
+  return {desc_pre_trim.column,
+          trimmed_value_range.first + static_cast<std::ptrdiff_t>(is_quoted),
+          trimmed_value_range.second - static_cast<std::ptrdiff_t>(is_quoted),
+          is_quoted};
 }
 
 /**
@@ -255,13 +268,14 @@ __global__ void convert_data_to_columns_kernel(parse_options_view opts,
     auto const desc =
       next_field_descriptor(current, row_data_range.second, opts, input_field_index, col_map);
     auto const value_len = static_cast<size_t>(std::max(desc.value_end - desc.value_begin, 0L));
+    auto const is_quoted = static_cast<std::ptrdiff_t>(desc.is_quoted);
 
     current = desc.value_end + 1;
 
     using string_index_pair = thrust::pair<const char*, size_type>;
 
-    // Empty fields are not legal values
-    if (!serialized_trie_contains(opts.trie_na, {desc.value_begin, value_len})) {
+    if (!serialized_trie_contains(opts.trie_na,
+                                  {desc.value_begin - is_quoted, value_len + is_quoted * 2})) {
       // Type dispatcher does not handle strings
       if (column_types[desc.column].id() == type_id::STRING) {
         auto str_list           = static_cast<string_index_pair*>(output_columns[desc.column]);
@@ -345,7 +359,7 @@ __global__ void detect_data_types_kernel(
       atomicAdd(&column_infos[desc.column].null_count, -1);
     }
     // Don't need counts to detect strings, any field in quotes is deduced to be a string
-    if (*(desc.value_begin - 1) == opts.quotechar && *desc.value_end == opts.quotechar) {
+    if (desc.is_quoted) {
       atomicAdd(&column_infos[desc.column].string_count, 1);
       continue;
     }
diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index 388c9b28001..cbd417c2b5b 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -503,9 +503,9 @@ __inline__ __device__ It skip_character(It const& it, char ch)
 /**
  * @brief Adjusts the range to ignore starting/trailing whitespace and quotation characters.
  *
- * @param[in] begin Pointer to the first character in the parsing range
- * @param[in] end pointer to the first character after the parsing range
- * @param[in] quotechar The character used to denote quotes; '\0' if none
+ * @param begin Pointer to the first character in the parsing range
+ * @param end Pointer to the first character after the parsing range
+ * @param quotechar The character used to denote quotes; '\0' if none
  *
  * @return Trimmed range
  */
@@ -523,6 +523,50 @@ __inline__ __device__ std::pair<char const*, char const*> trim_whitespaces_quote
   return {skip_character(trim_begin, quotechar), skip_character(trim_end, quotechar).base()};
 }
 
+/**
+ * @brief Adjusts the range to ignore starting/trailing whitespace characters.
+ *
+ * @param begin Pointer to the first character in the parsing range
+ * @param end Pointer to the first character after the parsing range
+ *
+ * @return Trimmed range
+ */
+__inline__ __device__ std::pair<char const*, char const*> trim_whitespaces(char const* begin,
+                                                                           char const* end)
+{
+  auto not_whitespace = [] __device__(auto c) { return !is_whitespace(c); };
+
+  auto const trim_begin = thrust::find_if(thrust::seq, begin, end, not_whitespace);
+  auto const trim_end   = thrust::find_if(thrust::seq,
+                                        thrust::make_reverse_iterator(end),
+                                        thrust::make_reverse_iterator(trim_begin),
+                                        not_whitespace);
+
+  return {trim_begin, trim_end.base()};
+}
+
+/**
+ * @brief Adjusts the range to ignore starting/trailing quotation characters.
+ *
+ * @param begin Pointer to the first character in the parsing range
+ * @param end Pointer to the first character after the parsing range
+ * @param quotechar The character used to denote quotes. Provide '\0' if no quotes should be
+ * trimmed.
+ *
+ * @return Trimmed range
+ */
+__inline__ __device__ std::pair<char const*, char const*> trim_quotes(char const* begin,
+                                                                      char const* end,
+                                                                      char quotechar)
+{
+  if ((thrust::distance(begin, end) >= 2 && *begin == quotechar &&
+       *thrust::prev(end) == quotechar)) {
+    thrust::advance(begin, 1);
+    thrust::advance(end, -1);
+  }
+  return {begin, end};
+}
+
 /**
  * @brief Decodes a numeric value base on templated cudf type T with specified
  * base.
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index fb2c24b3757..00d6e0b2899 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -687,6 +687,101 @@ def test_json_types_data():
     assert df.to_arrow().equals(pa_table_pdf)
 
 
+@pytest.mark.parametrize(
+    "col_type,json_str",
+    [
+        # without quotes
+        ("int", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]'),
+        # with quotes
+        ("int", '[{"k": "1"}, {"k": "2"}]'),
+        # with quotes, mixed
+        ("int", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]'),
+        # with quotes, null, mixed
+        ("int", '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]'),
+        # without quotes, null
+        ("int", '[{"k": 1}, {"k": 2}, {"k": null}, {"k": 4}]'),
+        # without quotes
+        ("float", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]'),
+        # with quotes
+        ("float", '[{"k": "1"}, {"k": "2"}]'),
+        # with quotes, mixed
+        ("float", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]'),
+        # with quotes, null, mixed
+        ("float", '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]'),
+        # with quotes, NAN
+        ("float", '[{"k": "1"}, {"k": "2"}, {"k": NaN}, {"k": "4"}]'),
+        # without quotes
+        ("str", '[{"k": 1}, {"k": 2}, {"k": 3}, {"k": 4}]'),
+        # with quotes
+        ("str", '[{"k": "1"}, {"k": "2"}]'),
+        # with quotes, mixed
+        ("str", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]'),
+        # with quotes, null, mixed
+        ("str", '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]'),
+        # without quotes, null
+        ("str", '[{"k": 1}, {"k": 2}, {"k": null}, {"k": 4}]'),
+    ],
+)
+def test_json_quoted_values_with_schema(col_type, json_str):
+    experimental_df = cudf.read_json(
+        StringIO(json_str),
+        engine="cudf_experimental",
+        orient="records",
+        dtype={"k": col_type},
+    )
+    cudf_df = cudf.read_json(
+        StringIO(json_str.replace(",", "\n")[1:-1]),
+        engine="cudf",
+        orient="records",
+        lines=True,
+        dtype={"k": col_type},
+    )
+    assert_eq(cudf_df, experimental_df)
+
+
+@pytest.mark.parametrize(
+    "col_type,json_str,expected",
+    [
+        # with quotes, mixed
+        ("int", '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]', [1, 2, 3, 4]),
+        # with quotes, null, mixed
+        (
+            "int",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            [1, 2, None, 4],
+        ),
+        # with quotes, mixed
+        (
+            "str",
+            '[{"k": "1"}, {"k": "2"}, {"k": 3}, {"k": 4}]',
+            ["1", "2", "3", "4"],
+        ),
+        # with quotes, null, mixed
+        (
+            "str",
+            '[{"k": "1"}, {"k": "2"}, {"k": null}, {"k": 4}]',
+            ["1", "2", None, "4"],
+        ),
+    ],
+)
+def test_json_quoted_values(col_type, json_str, expected):
+    experimental_df = cudf.read_json(
+        StringIO(json_str),
+        engine="cudf_experimental",
+        orient="records",
+        dtype={"k": col_type},
+    )
+    cudf_df = cudf.read_json(
+        StringIO(json_str.replace(",", "\n")[1:-1]),
+        engine="cudf",
+        orient="records",
+        lines=True,
+        dtype={"k": col_type},
+    )
+    assert_eq(expected, experimental_df.k.to_arrow().to_pylist())
+    assert_eq(expected, cudf_df.k.to_arrow().to_pylist())
+
+
 @pytest.mark.parametrize(
     "keep_quotes,result",
     [

From baa645dec67725036a724be02bb9d69de1d4136f Mon Sep 17 00:00:00 2001
From: Yuan Jiang <47068112+cindyyuanjiang@users.noreply.github.com>
Date: Thu, 3 Nov 2022 07:10:58 -0700
Subject: [PATCH 119/202] Add strings `like` jni and native method (#12032)

[rapidsai/cudf#11558](https://github.com/rapidsai/cudf/pull/11558) added strings `like` function to cudf, which is a wildcard-based string matching function based on SQL's LIKE statement.

We add `like` jni and native method calling the `like` function in #11558 and corresponding Java unit tests. This is part of the solution for issue [NVIDIA/spark-rapids#6430](https://github.com/NVIDIA/spark-rapids/issues/6430).

Authors:
  - Yuan Jiang (https://github.com/cindyyuanjiang)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Gera Shegalov (https://github.com/gerashegalov)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/12032
---
 .../main/java/ai/rapids/cudf/ColumnView.java  | 50 ++++++++++++++++
 java/src/main/native/src/ColumnViewJni.cpp    | 18 ++++++
 .../java/ai/rapids/cudf/ColumnVectorTest.java | 57 +++++++++++++++++++
 3 files changed, 125 insertions(+)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index 8bc764a078e..e639320b028 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -3276,6 +3276,46 @@ public final ColumnVector extractAllRecord(String pattern, int idx) {
     return new ColumnVector(extractAllRecord(this.getNativeView(), pattern, idx));
   }
 
+  /**
+   * Returns a boolean ColumnVector identifying rows which
+   * match the given like pattern.
+   *
+   * The like pattern expects only 2 wildcard special characters
+   * - `%` any number of any character (including no characters)
+   * - `_` any single character
+   *
+   * ```
+   * cv = ["azaa", "ababaabba", "aaxa"]
+   * r = cv.like("%a_aa%", "\\")
+   * r is now [true, true, false]
+   * r = cv.like("a__a", "\\")
+   * r is now [true, false, true]
+   * ```
+   *
+   * The escape character is specified to include either `%` or `_` in the search,
+   * which is expected to be either 0 or 1 character.
+   * If more than one character is specified, only the first character is used.
+   *
+   * ```
+   * cv = ["abc_def", "abc1def", "abc_"]
+   * r = cv.like("abc/_d%", "/")
+   * r is now [true, false, false]
+   * ```
+   * Any null string entries return corresponding null output column entries.
+   *
+   * @param pattern Like pattern to match to each string.
+   * @param escapeChar Character specifies the escape prefix; default is "\\".
+   * @return New ColumnVector of boolean results for each string.
+   */
+  public final ColumnVector like(Scalar pattern, Scalar escapeChar) {
+    assert type.equals(DType.STRING) : "column type must be a String";
+    assert pattern != null : "pattern scalar must not be null";
+    assert pattern.getType().equals(DType.STRING) : "pattern scalar must be a string scalar";
+    assert escapeChar != null : "escapeChar scalar must not be null";
+    assert escapeChar.getType().equals(DType.STRING) : "escapeChar scalar must be a string scalar";
+    return new ColumnVector(like(getNativeView(), pattern.getScalarHandle(), escapeChar.getScalarHandle()));
+  }
+
 
   /**
    * Converts all character sequences starting with '%' into character code-points
@@ -4034,6 +4074,16 @@ private static native long stringReplaceWithBackrefs(long columnView, String pat
    */
   private static native long containsRe(long cudfViewHandle, String pattern) throws CudfException;
 
+  /**
+   * Native method for checking if strings match the passed in like pattern
+   * and escape character.
+   * @param cudfViewHandle native handle of the cudf::column_view being operated on.
+   * @param patternHandle handle of scalar containing the string like pattern.
+   * @param escapeCharHandle handle of scalar containing the string escape character.
+   * @return native handle of the resulting cudf column containing the boolean results.
+   */
+  private static native long like(long cudfViewHandle, long patternHandle, long escapeCharHandle) throws CudfException;
+
   /**
    * Native method for checking if strings in a column contains a specified comparison string.
    * @param cudfViewHandle native handle of the cudf::column_view being operated on.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index e2a96de93ef..f52d3201a10 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1298,6 +1298,24 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_containsRe(JNIEnv *env, j
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_like(JNIEnv *env, jobject j_object,
+                                                            jlong j_view_handle, jlong pattern,
+                                                            jlong escapeChar) {
+  JNI_NULL_CHECK(env, j_view_handle, "column is null", false);
+  JNI_NULL_CHECK(env, pattern, "pattern is null", false);
+  JNI_NULL_CHECK(env, escapeChar, "escape character is null", false);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const column_view = reinterpret_cast<cudf::column_view const *>(j_view_handle);
+    auto const strings_column = cudf::strings_column_view{*column_view};
+    auto const pattern_scalar = reinterpret_cast<cudf::string_scalar const *>(pattern);
+    auto const escape_scalar = reinterpret_cast<cudf::string_scalar const *>(escapeChar);
+    return release_as_jlong(cudf::strings::like(strings_column, *pattern_scalar, *escape_scalar));
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVV(JNIEnv *env, jclass,
                                                                   jlong lhs_view, jlong rhs_view,
                                                                   jint int_op, jint out_dtype,
diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
index f5c32b0da20..e1ed5e12fc2 100644
--- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java
@@ -4193,6 +4193,63 @@ void testContainsReEmptyInput() {
     }
   }
 
+  @Test
+  void testLike() {
+    // Default escape character
+    try (ColumnVector testStrings = ColumnVector.fromStrings(
+           "a", "aa", "aaa", "aba", "b", "bb", "bba", "", "áéêú", "a1b2c3");
+         Scalar patternString1 = Scalar.fromString("a1b2c3");
+         Scalar patternString2 = Scalar.fromString("__a%");
+         Scalar defaultEscape = Scalar.fromString("\\");
+         ColumnVector res1 = testStrings.like(patternString1, defaultEscape);
+         ColumnVector res2 = testStrings.like(patternString2, defaultEscape);
+         ColumnVector expected1 = ColumnVector.fromBoxedBooleans(
+           false, false, false, false, false, false, false, false, false, true);
+         ColumnVector expected2 = ColumnVector.fromBoxedBooleans(
+           false, false, true, true, false, false, true, false, false, false)) {
+      assertColumnsAreEqual(expected1, res1);
+      assertColumnsAreEqual(expected2, res2);
+    }
+    // Non-default escape character
+    try (ColumnVector testStrings = ColumnVector.fromStrings(
+           "10%-20%", "10-20", "10%%-20%", "a_b", "b_a", "___", "", "aéb", "_%_", "_%a");
+         Scalar patternString1 = Scalar.fromString("10%%%%-20%%");
+         Scalar patternString2 = Scalar.fromString("___%%");
+         Scalar escapeChar1 = Scalar.fromString("%");
+         Scalar escapeChar2 = Scalar.fromString("_");
+         ColumnVector res1 = testStrings.like(patternString1, escapeChar1);
+         ColumnVector res2 = testStrings.like(patternString2, escapeChar2);
+         ColumnVector expected1 = ColumnVector.fromBoxedBooleans(
+           false, false, true, false, false, false, false, false, false, false);
+         ColumnVector expected2 = ColumnVector.fromBoxedBooleans(
+           false, false, false, false, false, false, false, false, true, true)) {
+      assertColumnsAreEqual(expected1, res1);
+      assertColumnsAreEqual(expected2, res2);
+    }
+    assertThrows(AssertionError.class, () -> {
+      try (ColumnVector testStrings = ColumnVector.fromStrings("a", "B", "cd", null, "");
+           Scalar defaultEscape = Scalar.fromString("\\");
+           ColumnVector res = testStrings.like(null, defaultEscape)) {}
+    });
+    assertThrows(AssertionError.class, () -> {
+      try (ColumnVector testStrings = ColumnVector.fromStrings("a", "B", "cd", null, "");
+           Scalar patternString = Scalar.fromString("");
+           ColumnVector res = testStrings.like(patternString, null)) {}
+    });
+    assertThrows(AssertionError.class, () -> {
+      try (ColumnVector testStrings = ColumnVector.fromStrings("a", "B", "cd", null, "");
+           Scalar patternString = Scalar.fromString("");
+           Scalar intScalar = Scalar.fromInt(1);
+           ColumnVector res = testStrings.like(patternString, intScalar)) {}
+    });
+    assertThrows(AssertionError.class, () -> {
+      try (ColumnVector testStrings = ColumnVector.fromStrings("a", "B", "cd", null, "");
+           Scalar intScalar = Scalar.fromInt(1);
+           Scalar defaultEscape = Scalar.fromString("\\");
+           ColumnVector res = testStrings.like(intScalar, defaultEscape)) {}
+    });
+  }
+
   @Test
   void testUrlDecode() {
     String[] inputs = new String[] {

From b156c25d300a96120b0a63d7fb28fce9a0771b35 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 3 Nov 2022 09:58:38 -0500
Subject: [PATCH 120/202] Add `memory_usage` & `items` implementation for
 `Struct` column & dtype (#12033)

Fixes: #11893

- [x] This PR implements `StructColumn.memory_usage` and `StructDtype.items`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12033
---
 python/cudf/cudf/core/column/struct.py | 13 +++++++++++++
 python/cudf/cudf/core/dtypes.py        |  8 ++++++++
 python/cudf/cudf/tests/test_list.py    |  2 ++
 python/cudf/cudf/tests/test_struct.py  | 21 +++++++++++++++++++++
 4 files changed, 44 insertions(+)

diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 67ff3e48dbd..69d70cf427f 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -1,6 +1,8 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 from __future__ import annotations
 
+from functools import cached_property
+
 import pandas as pd
 import pyarrow as pa
 
@@ -65,6 +67,17 @@ def to_pandas(self, index: pd.Index = None, **kwargs) -> "pd.Series":
             pd_series.index = index
         return pd_series
 
+    @cached_property
+    def memory_usage(self):
+        n = 0
+        if self.nullable:
+            n += cudf._lib.null_mask.bitmask_allocation_size_bytes(self.size)
+
+        for child in self.children:
+            n += child.memory_usage
+
+        return n
+
     def element_indexing(self, index: int):
         result = super().element_indexing(index)
         return {
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 25b1b3895de..39c7b8e6b57 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -4,6 +4,7 @@
 import operator
 import pickle
 import textwrap
+from functools import cached_property
 from typing import Any, Callable, Dict, List, Tuple, Type, Union
 
 import numpy as np
@@ -627,6 +628,13 @@ def deserialize(cls, header: dict, frames: list):
                 fields[k] = pickle.loads(dtype)
         return cls(fields)
 
+    @cached_property
+    def itemsize(self):
+        return sum(
+            cudf.utils.dtypes.cudf_dtype_from_pa_type(field.type).itemsize
+            for field in self._typ
+        )
+
 
 decimal_dtype_template = textwrap.dedent(
     """
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index 8ea11382419..4c2a14fc45c 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -864,6 +864,8 @@ def test_memory_usage():
     assert s1.memory_usage() == 44
     s2 = cudf.Series([[[[1, 2]]], [[[3, 4]]]])
     assert s2.memory_usage() == 68
+    s3 = cudf.Series([[{"b": 1, "a": 10}, {"b": 2, "a": 100}]])
+    assert s3.memory_usage() == 40
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
index 4c70d20c488..eaee1efcbc8 100644
--- a/python/cudf/cudf/tests/test_struct.py
+++ b/python/cudf/cudf/tests/test_struct.py
@@ -371,3 +371,24 @@ def test_nested_struct_extract_host_scalars(data, idx, expected):
     series = cudf.Series(data)
 
     assert _nested_na_replace(series[idx]) == _nested_na_replace(expected)
+
+
+def test_struct_memory_usage():
+    s = cudf.Series([{"a": 1, "b": 10}, {"a": 2, "b": 20}, {"a": 3, "b": 30}])
+    df = s.struct.explode()
+
+    assert_eq(s.memory_usage(), df.memory_usage().sum())
+
+
+def test_struct_with_null_memory_usage():
+    df = cudf.DataFrame(
+        {
+            "a": cudf.Series([1, 2, -1, -1, 3], dtype="int64"),
+            "b": cudf.Series([10, 20, -1, -1, 30], dtype="int64"),
+        }
+    )
+    s = df.to_struct()
+    assert s.memory_usage() == 80
+
+    s[2:4] = None
+    assert s.memory_usage() == 272

From 2a58ff64bc2869d0a3527b95a8de334eb5bc800e Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 4 Nov 2022 06:01:13 -0700
Subject: [PATCH 121/202] Force using old fmt in nvbench. (#12067)

This is a port of #12064 to 22.12 to unblock CI because forward mergers are currently disabled.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Robert Maynard (https://github.com/robertmaynard)

URL: https://github.com/rapidsai/cudf/pull/12067
---
 cpp/CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d63c7e75616..03cf4c7d2b7 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -753,7 +753,10 @@ if(CUDF_BUILD_BENCHMARKS)
   include(${rapids-cmake-dir}/cpm/gbench.cmake)
   rapids_cpm_gbench()
 
-  # Find or install NVBench
+  # Find or install NVBench Temporarily force downloading of fmt because current versions of nvbench
+  # do not support the latest version of fmt, which is automatically pulled into our conda
+  # environments by mamba.
+  set(CPM_DOWNLOAD_fmt TRUE)
   include(${rapids-cmake-dir}/cpm/nvbench.cmake)
   rapids_cpm_nvbench()
   add_subdirectory(benchmarks)

From 1d6931af817b39e2630d0c094f51508a52424f18 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Fri, 4 Nov 2022 11:16:47 -0500
Subject: [PATCH 122/202] Allow falling back to `shim_60.ptx` by default in
 `strings_udf` (#12056)

In the context of distributed, `strings_udf` needs to import and set itself up without creating a CUDA context, as this can interfere with up the way the network is being set up. In this situation it can't use it's normal mechanism (which requires a context) to query the compute capability of the device, and it falls back on an environment variable `STRINGS_UDF_CC` that it needs to be passed from dask instead. A user can set this and their code will work no problem, but we also need some default configuration that just works when someone builds their code. Without knowing their setup beforehand this can be problematic, as such I originally added the default value of `cc=52` when the environment variable isn't set. This was however not exactly correct for a few reasons:

- It should be 60 I think since pascal is the oldest arch supported by rapids
- we don't always build shim_60.ptx especially in local mode.

This PR fixes this problem.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12056
---
 python/strings_udf/cpp/CMakeLists.txt      | 4 ++++
 python/strings_udf/strings_udf/__init__.py | 3 ++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/strings_udf/cpp/CMakeLists.txt b/python/strings_udf/cpp/CMakeLists.txt
index e5b4aca7076..3e58d10d6e2 100644
--- a/python/strings_udf/cpp/CMakeLists.txt
+++ b/python/strings_udf/cpp/CMakeLists.txt
@@ -92,6 +92,10 @@ endfunction()
 # Create the shim library for each architecture.
 set(SHIM_CUDA_FLAGS --expt-relaxed-constexpr -rdc=true)
 
+# always build a default PTX file in case RAPIDS_NO_INITIALIZE is set and the device cc can't be
+# safely queried through a context
+list(INSERT CMAKE_CUDA_ARCHITECTURES 0 "60")
+
 list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-real" "")
 list(TRANSFORM CMAKE_CUDA_ARCHITECTURES REPLACE "-virtual" "")
 list(SORT CMAKE_CUDA_ARCHITECTURES)
diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py
index 24f1a2d3bda..2222fb72009 100644
--- a/python/strings_udf/strings_udf/__init__.py
+++ b/python/strings_udf/strings_udf/__init__.py
@@ -43,7 +43,8 @@ def maybe_patch_numba_linker(driver_version):
 
 def _get_ptx_file():
     if "RAPIDS_NO_INITIALIZE" in os.environ:
-        cc = int(os.environ.get("STRINGS_UDF_CC", "52"))
+        # shim_60.ptx is always built
+        cc = int(os.environ.get("STRINGS_UDF_CC", "60"))
     else:
         dev = cuda.get_current_device()
 

From 0278485129f62135d0b2dbd0aad44c0b9fb7537e Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Fri, 4 Nov 2022 13:17:35 -0400
Subject: [PATCH 123/202] Remove default parameters for cudf::strings::detail
 functions (#12003)

Removes default parameters from the `cudf::strings::detail` functions. Most of these were unintentional the rest were for allowing for the default memory-resource which was easily fixed. Most of the detail functions are not used outside of strings and the default parameters were not actually necessary there.

Hopefully this will help with #11967

Authors:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/12003
---
 cpp/benchmarks/string/json.cu                 |   4 +-
 cpp/include/cudf/strings/detail/combine.hpp   |  25 ++--
 .../cudf/strings/detail/concatenate.hpp       |   7 +-
 .../cudf/strings/detail/copy_if_else.cuh      |  13 +-
 .../cudf/strings/detail/copy_range.cuh        |  15 ++-
 cpp/include/cudf/strings/detail/copying.hpp   |  12 +-
 cpp/include/cudf/strings/detail/fill.hpp      |  13 +-
 cpp/include/cudf/strings/detail/gather.cuh    |  24 ++--
 cpp/include/cudf/strings/detail/json.hpp      |  13 +-
 cpp/include/cudf/strings/detail/replace.hpp   |  49 ++++----
 cpp/include/cudf/strings/detail/scatter.cuh   |  16 +--
 cpp/include/cudf/strings/detail/utilities.cuh |  29 ++---
 cpp/include/cudf/strings/detail/utilities.hpp |   9 +-
 cpp/src/io/csv/writer_impl.cu                 |  13 +-
 cpp/src/strings/attributes.cu                 |  21 ++--
 cpp/src/strings/case.cu                       |  21 ++--
 cpp/src/strings/char_types/char_types.cu      |  11 +-
 cpp/src/strings/contains.cu                   |  33 +++---
 cpp/src/strings/convert/convert_floats.cu     |   7 +-
 cpp/src/strings/convert/convert_hex.cu        |   9 +-
 cpp/src/strings/convert/convert_integers.cu   |  16 ++-
 cpp/src/strings/convert/convert_ipv4.cu       |  14 +--
 cpp/src/strings/convert/convert_urls.cu       |  14 +--
 cpp/src/strings/count_matches.hpp             |  11 +-
 cpp/src/strings/extract/extract_all.cu        |  11 +-
 cpp/src/strings/filling/fill.cu               |  15 ++-
 cpp/src/strings/like.cu                       |  11 +-
 cpp/src/strings/padding.cu                    |  22 ++--
 cpp/src/strings/replace/multi_re.cu           |  13 +-
 cpp/src/strings/replace/replace_re.cu         |  15 ++-
 cpp/src/strings/search/find.cu                |  80 ++++++-------
 cpp/src/strings/search/find_multiple.cu       |   9 +-
 cpp/src/strings/search/findall.cu             |  11 +-
 cpp/src/strings/split/partition.cu            |  18 ++-
 cpp/src/strings/split/split.cu                |  22 ++--
 cpp/src/strings/split/split_re.cu             |   3 +-
 cpp/src/strings/split/split_record.cu         |  11 +-
 cpp/src/strings/strings_column_factories.cu   |  13 +-
 cpp/src/strings/strip.cu                      |  11 +-
 cpp/src/strings/substring.cu                  |  24 ++--
 cpp/src/strings/translate.cu                  |   9 +-
 cpp/src/strings/wrap.cu                       |   9 +-
 cpp/tests/strings/array_tests.cpp             |  40 +++----
 cpp/tests/strings/concatenate_tests.cpp       |   8 +-
 cpp/tests/strings/fill_tests.cpp              |  60 +++-------
 cpp/tests/strings/replace_tests.cpp           | 111 ++++++++++--------
 .../cpp/src/strings/udf/udf_apis.cu           |   2 +-
 47 files changed, 426 insertions(+), 501 deletions(-)

diff --git a/cpp/benchmarks/string/json.cu b/cpp/benchmarks/string/json.cu
index 87528608cc7..d7c0066eb33 100644
--- a/cpp/benchmarks/string/json.cu
+++ b/cpp/benchmarks/string/json.cu
@@ -177,8 +177,8 @@ auto build_json_string_column(int desired_bytes, int num_rows)
   auto d_store_order = cudf::column_device_view::create(float_2bool_columns->get_column(2));
   json_benchmark_row_builder jb{
     desired_bytes, num_rows, {*d_books, *d_bicycles}, *d_book_pct, *d_misc_order, *d_store_order};
-  auto children =
-    cudf::strings::detail::make_strings_children(jb, num_rows, cudf::get_default_stream());
+  auto children = cudf::strings::detail::make_strings_children(
+    jb, num_rows, cudf::get_default_stream(), rmm::mr::get_current_device_resource());
   return cudf::make_strings_column(
     num_rows, std::move(children.first), std::move(children.second), 0, {});
 }
diff --git a/cpp/include/cudf/strings/detail/combine.hpp b/cpp/include/cudf/strings/detail/combine.hpp
index ade28faf645..3b8ed0f4e0d 100644
--- a/cpp/include/cudf/strings/detail/combine.hpp
+++ b/cpp/include/cudf/strings/detail/combine.hpp
@@ -34,14 +34,12 @@ namespace detail {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> concatenate(
-  table_view const& strings_columns,
-  string_scalar const& separator,
-  string_scalar const& narep,
-  separator_on_nulls separate_nulls = separator_on_nulls::YES,
-  // Move before separate_nulls?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> concatenate(table_view const& strings_columns,
+                                    string_scalar const& separator,
+                                    string_scalar const& narep,
+                                    separator_on_nulls separate_nulls,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc join_strings(table_view const&,string_scalar const&,string_scalar
@@ -49,12 +47,11 @@ std::unique_ptr<column> concatenate(
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> join_strings(
-  strings_column_view const& strings,
-  string_scalar const& separator,
-  string_scalar const& narep,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> join_strings(strings_column_view const& strings,
+                                     string_scalar const& separator,
+                                     string_scalar const& narep,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc join_list_elements(table_view const&,string_scalar const&,string_scalar
diff --git a/cpp/include/cudf/strings/detail/concatenate.hpp b/cpp/include/cudf/strings/detail/concatenate.hpp
index caaeb2afbe7..511e240886a 100644
--- a/cpp/include/cudf/strings/detail/concatenate.hpp
+++ b/cpp/include/cudf/strings/detail/concatenate.hpp
@@ -42,10 +42,9 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New column with concatenated results.
  */
-std::unique_ptr<column> concatenate(
-  host_span<column_view const> columns,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> concatenate(host_span<column_view const> columns,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace strings
diff --git a/cpp/include/cudf/strings/detail/copy_if_else.cuh b/cpp/include/cudf/strings/detail/copy_if_else.cuh
index 79cec779e02..374c3b2cf68 100644
--- a/cpp/include/cudf/strings/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/strings/detail/copy_if_else.cuh
@@ -56,13 +56,12 @@ namespace detail {
  * @return New strings column.
  */
 template <typename StringIterLeft, typename StringIterRight, typename Filter>
-std::unique_ptr<cudf::column> copy_if_else(
-  StringIterLeft lhs_begin,
-  StringIterLeft lhs_end,
-  StringIterRight rhs_begin,
-  Filter filter_fn,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<cudf::column> copy_if_else(StringIterLeft lhs_begin,
+                                           StringIterLeft lhs_end,
+                                           StringIterRight rhs_begin,
+                                           Filter filter_fn,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   auto strings_count = std::distance(lhs_begin, lhs_end);
   if (strings_count == 0) return make_empty_column(type_id::STRING);
diff --git a/cpp/include/cudf/strings/detail/copy_range.cuh b/cpp/include/cudf/strings/detail/copy_range.cuh
index e83f6dc0005..ee09ce9a7a9 100644
--- a/cpp/include/cudf/strings/detail/copy_range.cuh
+++ b/cpp/include/cudf/strings/detail/copy_range.cuh
@@ -99,14 +99,13 @@ namespace detail {
  * @return std::unique_ptr<column> The result target column
  */
 template <typename SourceValueIterator, typename SourceValidityIterator>
-std::unique_ptr<column> copy_range(
-  SourceValueIterator source_value_begin,
-  SourceValidityIterator source_validity_begin,
-  strings_column_view const& target,
-  size_type target_begin,
-  size_type target_end,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> copy_range(SourceValueIterator source_value_begin,
+                                   SourceValidityIterator source_validity_begin,
+                                   strings_column_view const& target,
+                                   size_type target_begin,
+                                   size_type target_end,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(
     (target_begin >= 0) && (target_begin < target.size()) && (target_end <= target.size()),
diff --git a/cpp/include/cudf/strings/detail/copying.hpp b/cpp/include/cudf/strings/detail/copying.hpp
index c70952b0962..7e82ad4c679 100644
--- a/cpp/include/cudf/strings/detail/copying.hpp
+++ b/cpp/include/cudf/strings/detail/copying.hpp
@@ -49,13 +49,11 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column of size (end-start)/step.
  */
-std::unique_ptr<cudf::column> copy_slice(
-  strings_column_view const& strings,
-  size_type start,
-  size_type end = -1,
-  // Move before end?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<cudf::column> copy_slice(strings_column_view const& strings,
+                                         size_type start,
+                                         size_type end,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Returns a new strings column created by shifting the rows by a specified offset.
diff --git a/cpp/include/cudf/strings/detail/fill.hpp b/cpp/include/cudf/strings/detail/fill.hpp
index 1ad9663a614..43e3f6198f3 100644
--- a/cpp/include/cudf/strings/detail/fill.hpp
+++ b/cpp/include/cudf/strings/detail/fill.hpp
@@ -42,13 +42,12 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
-std::unique_ptr<column> fill(
-  strings_column_view const& strings,
-  size_type begin,
-  size_type end,
-  string_scalar const& value,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> fill(strings_column_view const& strings,
+                             size_type begin,
+                             size_type end,
+                             string_scalar const& value,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace strings
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index dfc8f0dacc5..4820e6e77c7 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -288,12 +288,11 @@ std::unique_ptr<cudf::column> gather_chars(StringIterator strings_begin,
  * @return New strings column containing the gathered strings.
  */
 template <bool NullifyOutOfBounds, typename MapIterator>
-std::unique_ptr<cudf::column> gather(
-  strings_column_view const& strings,
-  MapIterator begin,
-  MapIterator end,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
+                                     MapIterator begin,
+                                     MapIterator end,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
 {
   auto const output_count  = std::distance(begin, end);
   auto const strings_count = strings.size();
@@ -372,13 +371,12 @@ std::unique_ptr<cudf::column> gather(
  * @return New strings column containing the gathered strings.
  */
 template <typename MapIterator>
-std::unique_ptr<cudf::column> gather(
-  strings_column_view const& strings,
-  MapIterator begin,
-  MapIterator end,
-  bool nullify_out_of_bounds,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
+                                     MapIterator begin,
+                                     MapIterator end,
+                                     bool nullify_out_of_bounds,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
 {
   if (nullify_out_of_bounds) return gather<true>(strings, begin, end, stream, mr);
   return gather<false>(strings, begin, end, stream, mr);
diff --git a/cpp/include/cudf/strings/detail/json.hpp b/cpp/include/cudf/strings/detail/json.hpp
index 8ea579ae5c0..0fb06d36570 100644
--- a/cpp/include/cudf/strings/detail/json.hpp
+++ b/cpp/include/cudf/strings/detail/json.hpp
@@ -16,6 +16,8 @@
 
 #pragma once
 
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/json.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
@@ -30,12 +32,11 @@ namespace detail {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches
  */
-std::unique_ptr<cudf::column> get_json_object(
-  cudf::strings_column_view const& col,
-  cudf::string_scalar const& json_path,
-  get_json_object_options options,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<cudf::column> get_json_object(cudf::strings_column_view const& col,
+                                              cudf::string_scalar const& json_path,
+                                              cudf::strings::get_json_object_options options,
+                                              rmm::cuda_stream_view stream,
+                                              rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace strings
diff --git a/cpp/include/cudf/strings/detail/replace.hpp b/cpp/include/cudf/strings/detail/replace.hpp
index a9a6ef00103..aa6fb2feb3d 100644
--- a/cpp/include/cudf/strings/detail/replace.hpp
+++ b/cpp/include/cudf/strings/detail/replace.hpp
@@ -43,14 +43,12 @@ enum class replace_algorithm {
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
 template <replace_algorithm alg = replace_algorithm::AUTO>
-std::unique_ptr<column> replace(
-  strings_column_view const& strings,
-  string_scalar const& target,
-  string_scalar const& repl,
-  int32_t maxrepl = -1,
-  // Move before maxrepl?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> replace(strings_column_view const& strings,
+                                string_scalar const& target,
+                                string_scalar const& repl,
+                                int32_t maxrepl,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::strings::replace_slice(strings_column_view const&, string_scalar const&,
@@ -58,14 +56,12 @@ std::unique_ptr<column> replace(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> replace_slice(
-  strings_column_view const& strings,
-  string_scalar const& repl = string_scalar(""),
-  size_type start           = 0,
-  size_type stop            = -1,
-  // Move before repl?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> replace_slice(strings_column_view const& strings,
+                                      string_scalar const& repl,
+                                      size_type start,
+                                      size_type stop,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
 
 /**
  * @copydoc cudf::strings::replace(strings_column_view const&, strings_column_view const&,
@@ -73,12 +69,11 @@ std::unique_ptr<column> replace_slice(
  *
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> replace(
-  strings_column_view const& strings,
-  strings_column_view const& targets,
-  strings_column_view const& repls,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> replace(strings_column_view const& strings,
+                                strings_column_view const& targets,
+                                strings_column_view const& repls,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Replaces any null string entries with the given string.
@@ -98,12 +93,10 @@ std::unique_ptr<column> replace(
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return New strings column.
  */
-std::unique_ptr<column> replace_nulls(
-  strings_column_view const& strings,
-  string_scalar const& repl = string_scalar(""),
-  // Move before repl?
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> replace_nulls(strings_column_view const& strings,
+                                      string_scalar const& repl,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace strings
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index c8a90ea538a..7d6a07b4b10 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -57,18 +57,18 @@ namespace detail {
  * @return New strings column.
  */
 template <typename SourceIterator, typename MapIterator>
-std::unique_ptr<column> scatter(
-  SourceIterator begin,
-  SourceIterator end,
-  MapIterator scatter_map,
-  strings_column_view const& target,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> scatter(SourceIterator begin,
+                                SourceIterator end,
+                                MapIterator scatter_map,
+                                strings_column_view const& target,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr)
 {
   if (target.is_empty()) return make_empty_column(type_id::STRING);
 
   // create vector of string_view's to scatter into
-  rmm::device_uvector<string_view> target_vector = create_string_vector_from_column(target, stream);
+  rmm::device_uvector<string_view> target_vector =
+    create_string_vector_from_column(target, stream, rmm::mr::get_current_device_resource());
 
   // this ensures empty strings are not mapped to nulls in the make_strings_column function
   auto const size = thrust::distance(begin, end);
diff --git a/cpp/include/cudf/strings/detail/utilities.cuh b/cpp/include/cudf/strings/detail/utilities.cuh
index 9404ac14775..76e5f931981 100644
--- a/cpp/include/cudf/strings/detail/utilities.cuh
+++ b/cpp/include/cudf/strings/detail/utilities.cuh
@@ -50,11 +50,10 @@ namespace detail {
  * @return offsets child column for strings column
  */
 template <typename InputIterator>
-std::unique_ptr<column> make_offsets_child_column(
-  InputIterator begin,
-  InputIterator end,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> make_offsets_child_column(InputIterator begin,
+                                                  InputIterator end,
+                                                  rmm::cuda_stream_view stream,
+                                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(begin < end, "Invalid iterator range");
   auto count = thrust::distance(begin, end);
@@ -117,12 +116,11 @@ __device__ inline char* copy_string(char* buffer, const string_view& d_string)
  * @return offsets child column and chars child column for a strings column
  */
 template <typename SizeAndExecuteFunction>
-auto make_strings_children(
-  SizeAndExecuteFunction size_and_exec_fn,
-  size_type exec_size,
-  size_type strings_count,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
+                           size_type exec_size,
+                           size_type strings_count,
+                           rmm::cuda_stream_view stream,
+                           rmm::mr::device_memory_resource* mr)
 {
   auto offsets_column = make_numeric_column(
     data_type{type_id::INT32}, strings_count + 1, mask_state::UNALLOCATED, stream, mr);
@@ -175,11 +173,10 @@ auto make_strings_children(
  * @return offsets child column and chars child column for a strings column
  */
 template <typename SizeAndExecuteFunction>
-auto make_strings_children(
-  SizeAndExecuteFunction size_and_exec_fn,
-  size_type strings_count,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+auto make_strings_children(SizeAndExecuteFunction size_and_exec_fn,
+                           size_type strings_count,
+                           rmm::cuda_stream_view stream,
+                           rmm::mr::device_memory_resource* mr)
 {
   return make_strings_children(size_and_exec_fn, strings_count, strings_count, stream, mr);
 }
diff --git a/cpp/include/cudf/strings/detail/utilities.hpp b/cpp/include/cudf/strings/detail/utilities.hpp
index 829e0207110..41a2654dce3 100644
--- a/cpp/include/cudf/strings/detail/utilities.hpp
+++ b/cpp/include/cudf/strings/detail/utilities.hpp
@@ -36,10 +36,9 @@ namespace detail {
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return The chars child column for a strings column.
  */
-std::unique_ptr<column> create_chars_child_column(
-  size_type bytes,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> create_chars_child_column(size_type bytes,
+                                                  rmm::cuda_stream_view stream,
+                                                  rmm::mr::device_memory_resource* mr);
 
 /**
  * @brief Creates a string_view vector from a strings column.
@@ -52,7 +51,7 @@ std::unique_ptr<column> create_chars_child_column(
 rmm::device_uvector<string_view> create_string_vector_from_column(
   cudf::strings_column_view const strings,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace strings
diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu
index 7230b455d4a..ed2f412f291 100644
--- a/cpp/src/io/csv/writer_impl.cu
+++ b/cpp/src/io/csv/writer_impl.cu
@@ -364,8 +364,11 @@ void write_chunked(data_sink* out_sink,
   CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column.");
 
   cudf::string_scalar newline{options.get_line_terminator()};
-  auto p_str_col_w_nl =
-    cudf::strings::detail::join_strings(str_column_view, newline, string_scalar("", false), stream);
+  auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view,
+                                                            newline,
+                                                            string_scalar("", false),
+                                                            stream,
+                                                            rmm::mr::get_current_device_resource());
   strings_column_view strings_column{p_str_col_w_nl->view()};
 
   auto total_num_bytes      = strings_column.chars_size();
@@ -470,9 +473,11 @@ void write_csv(data_sink* out_sink,
                                                     delimiter_str,
                                                     options.get_na_rep(),
                                                     strings::separator_on_nulls::YES,
-                                                    stream);
+                                                    stream,
+                                                    rmm::mr::get_current_device_resource());
         cudf::string_scalar narep{options.get_na_rep()};
-        return cudf::strings::detail::replace_nulls(str_table_view.column(0), narep, stream);
+        return cudf::strings::detail::replace_nulls(
+          str_table_view.column(0), narep, stream, rmm::mr::get_current_device_resource());
       }();
 
       write_chunked(out_sink, str_concat_col->view(), options, stream, mr);
diff --git a/cpp/src/strings/attributes.cu b/cpp/src/strings/attributes.cu
index ea01b570b91..127d3aa8fe7 100644
--- a/cpp/src/strings/attributes.cu
+++ b/cpp/src/strings/attributes.cu
@@ -87,19 +87,17 @@ std::unique_ptr<column> counts_fn(strings_column_view const& strings,
 
 }  // namespace
 
-std::unique_ptr<column> count_characters(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> count_characters(strings_column_view const& strings,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr)
 {
   auto ufn = [] __device__(const string_view& d_str) { return d_str.length(); };
   return counts_fn(strings, ufn, stream, mr);
 }
 
-std::unique_ptr<column> count_bytes(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> count_bytes(strings_column_view const& strings,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
 {
   auto ufn = [] __device__(const string_view& d_str) { return d_str.size_bytes(); };
   return counts_fn(strings, ufn, stream, mr);
@@ -135,10 +133,9 @@ struct code_points_fn {
 
 namespace detail {
 //
-std::unique_ptr<column> code_points(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> code_points(strings_column_view const& strings,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
 {
   auto strings_column = column_device_view::create(strings.parent(), stream);
   auto d_column       = *strings_column;
diff --git a/cpp/src/strings/case.cu b/cpp/src/strings/case.cu
index 05c2904ec9e..a2cee757112 100644
--- a/cpp/src/strings/case.cu
+++ b/cpp/src/strings/case.cu
@@ -147,30 +147,27 @@ std::unique_ptr<column> convert_case(strings_column_view const& strings,
 
 }  // namespace
 
-std::unique_ptr<column> to_lower(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> to_lower(strings_column_view const& strings,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   character_flags_table_type case_flag = IS_UPPER(0xFF);  // convert only upper case characters
   return convert_case(strings, case_flag, stream, mr);
 }
 
 //
-std::unique_ptr<column> to_upper(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> to_upper(strings_column_view const& strings,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   character_flags_table_type case_flag = IS_LOWER(0xFF);  // convert only lower case characters
   return convert_case(strings, case_flag, stream, mr);
 }
 
 //
-std::unique_ptr<column> swapcase(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> swapcase(strings_column_view const& strings,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   // convert only upper or lower case characters
   character_flags_table_type case_flag = IS_LOWER(0xFF) | IS_UPPER(0xFF);
diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu
index 0426d82c6c6..aa1e4dce4d0 100644
--- a/cpp/src/strings/char_types/char_types.cu
+++ b/cpp/src/strings/char_types/char_types.cu
@@ -38,12 +38,11 @@ namespace cudf {
 namespace strings {
 namespace detail {
 //
-std::unique_ptr<column> all_characters_of_type(
-  strings_column_view const& strings,
-  string_character_types types,
-  string_character_types verify_types,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> all_characters_of_type(strings_column_view const& strings,
+                                               string_character_types types,
+                                               string_character_types verify_types,
+                                               rmm::cuda_stream_view stream,
+                                               rmm::mr::device_memory_resource* mr)
 {
   auto strings_count  = strings.size();
   auto strings_column = column_device_view::create(strings.parent(), stream);
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index c6e71b00809..80941990610 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -86,32 +86,29 @@ std::unique_ptr<column> contains_impl(strings_column_view const& input,
 
 }  // namespace
 
-std::unique_ptr<column> contains_re(
-  strings_column_view const& input,
-  std::string_view pattern,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> contains_re(strings_column_view const& input,
+                                    std::string_view pattern,
+                                    regex_flags const flags,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
 {
   return contains_impl(input, pattern, flags, false, stream, mr);
 }
 
-std::unique_ptr<column> matches_re(
-  strings_column_view const& input,
-  std::string_view pattern,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> matches_re(strings_column_view const& input,
+                                   std::string_view pattern,
+                                   regex_flags const flags,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   return contains_impl(input, pattern, flags, true, stream, mr);
 }
 
-std::unique_ptr<column> count_re(
-  strings_column_view const& input,
-  std::string_view pattern,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> count_re(strings_column_view const& input,
+                                 std::string_view pattern,
+                                 regex_flags const flags,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   // compile regex into device object
   auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu
index 2de4bd2a2cc..49713731ff5 100644
--- a/cpp/src/strings/convert/convert_floats.cu
+++ b/cpp/src/strings/convert/convert_floats.cu
@@ -454,10 +454,9 @@ std::unique_ptr<column> from_floats(column_view const& floats, rmm::mr::device_m
 }
 
 namespace detail {
-std::unique_ptr<column> is_float(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> is_float(strings_column_view const& strings,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   auto strings_column = column_device_view::create(strings.parent(), stream);
   auto d_column       = *strings_column;
diff --git a/cpp/src/strings/convert/convert_hex.cu b/cpp/src/strings/convert/convert_hex.cu
index dbbdffac2c2..f41232a4af6 100644
--- a/cpp/src/strings/convert/convert_hex.cu
+++ b/cpp/src/strings/convert/convert_hex.cu
@@ -206,11 +206,10 @@ struct dispatch_integers_to_hex_fn {
 }  // namespace
 
 // This will convert a strings column into any integer column type.
-std::unique_ptr<column> hex_to_integers(
-  strings_column_view const& strings,
-  data_type output_type,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> hex_to_integers(strings_column_view const& strings,
+                                        data_type output_type,
+                                        rmm::cuda_stream_view stream,
+                                        rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = strings.size();
   if (strings_count == 0) return make_empty_column(output_type);
diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu
index 343288af0c1..ed40c47b99d 100644
--- a/cpp/src/strings/convert/convert_integers.cu
+++ b/cpp/src/strings/convert/convert_integers.cu
@@ -157,10 +157,9 @@ struct dispatch_is_integer_fn {
 
 }  // namespace
 
-std::unique_ptr<column> is_integer(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> is_integer(strings_column_view const& strings,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   auto const d_column = column_device_view::create(strings.parent(), stream);
   auto results        = make_numeric_column(data_type{type_id::BOOL8},
@@ -192,11 +191,10 @@ std::unique_ptr<column> is_integer(
   return results;
 }
 
-std::unique_ptr<column> is_integer(
-  strings_column_view const& strings,
-  data_type int_type,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> is_integer(strings_column_view const& strings,
+                                   data_type int_type,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   if (strings.is_empty()) { return cudf::make_empty_column(type_id::BOOL8); }
   return type_dispatcher(int_type, dispatch_is_integer_fn{}, strings, stream, mr);
diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu
index 5229f0fdf1b..0dcb2b61446 100644
--- a/cpp/src/strings/convert/convert_ipv4.cu
+++ b/cpp/src/strings/convert/convert_ipv4.cu
@@ -75,10 +75,9 @@ struct ipv4_to_integers_fn {
 }  // namespace
 
 // Convert strings column of IPv4 addresses to integers column
-std::unique_ptr<column> ipv4_to_integers(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> ipv4_to_integers(strings_column_view const& strings,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = strings.size();
   if (strings_count == 0) return make_numeric_column(data_type{type_id::INT64}, 0);
@@ -162,10 +161,9 @@ struct integers_to_ipv4_fn {
 }  // namespace
 
 // Convert integers into IPv4 addresses
-std::unique_ptr<column> integers_to_ipv4(
-  column_view const& integers,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> integers_to_ipv4(column_view const& integers,
+                                         rmm::cuda_stream_view stream,
+                                         rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = integers.size();
   if (strings_count == 0) return make_empty_column(type_id::STRING);
diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu
index 0c6ecf46313..25e37526f59 100644
--- a/cpp/src/strings/convert/convert_urls.cu
+++ b/cpp/src/strings/convert/convert_urls.cu
@@ -129,10 +129,9 @@ struct url_encoder_fn {
 }  // namespace
 
 //
-std::unique_ptr<column> url_encode(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> url_encode(strings_column_view const& strings,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = strings.size();
   if (strings_count == 0) return make_empty_column(type_id::STRING);
@@ -388,10 +387,9 @@ __global__ void url_decode_char_replacer(column_device_view const in_strings,
 }  // namespace
 
 //
-std::unique_ptr<column> url_decode(
-  strings_column_view const& strings,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> url_decode(strings_column_view const& strings,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = strings.size();
   if (strings_count == 0) return make_empty_column(type_id::STRING);
diff --git a/cpp/src/strings/count_matches.hpp b/cpp/src/strings/count_matches.hpp
index d4bcdaf4042..a4f76c1c5e3 100644
--- a/cpp/src/strings/count_matches.hpp
+++ b/cpp/src/strings/count_matches.hpp
@@ -41,12 +41,11 @@ class reprog_device;
  * @param mr Device memory resource used to allocate the returned column's device memory.
  * @return Integer column of match counts
  */
-std::unique_ptr<column> count_matches(
-  column_device_view const& d_strings,
-  reprog_device& d_prog,
-  size_type output_size,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+std::unique_ptr<column> count_matches(column_device_view const& d_strings,
+                                      reprog_device& d_prog,
+                                      size_type output_size,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr);
 
 }  // namespace detail
 }  // namespace strings
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index 1ba5a8a1470..e669d2178a2 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -95,12 +95,11 @@ struct extract_fn {
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
-std::unique_ptr<column> extract_all_record(
-  strings_column_view const& input,
-  std::string_view pattern,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> extract_all_record(strings_column_view const& input,
+                                           std::string_view pattern,
+                                           regex_flags const flags,
+                                           rmm::cuda_stream_view stream,
+                                           rmm::mr::device_memory_resource* mr)
 {
   auto const strings_count = input.size();
   auto const d_strings     = column_device_view::create(input.parent(), stream);
diff --git a/cpp/src/strings/filling/fill.cu b/cpp/src/strings/filling/fill.cu
index f813ec24ee9..4bd98ee4cdc 100644
--- a/cpp/src/strings/filling/fill.cu
+++ b/cpp/src/strings/filling/fill.cu
@@ -19,7 +19,7 @@
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/null_mask.hpp>
 #include <cudf/scalar/scalar_device_view.cuh>
-#include <cudf/strings/combine.hpp>
+#include <cudf/strings/detail/fill.hpp>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/strings/detail/utilities.hpp>
 #include <cudf/strings/string_view.cuh>
@@ -35,13 +35,12 @@
 namespace cudf {
 namespace strings {
 namespace detail {
-std::unique_ptr<column> fill(
-  strings_column_view const& strings,
-  size_type begin,
-  size_type end,
-  string_scalar const& value,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> fill(strings_column_view const& strings,
+                             size_type begin,
+                             size_type end,
+                             string_scalar const& value,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr)
 {
   auto strings_count = strings.size();
   if (strings_count == 0) return make_empty_column(type_id::STRING);
diff --git a/cpp/src/strings/like.cu b/cpp/src/strings/like.cu
index cb6fc844426..4e4df6cb1ad 100644
--- a/cpp/src/strings/like.cu
+++ b/cpp/src/strings/like.cu
@@ -102,12 +102,11 @@ struct like_fn {
 
 }  // namespace
 
-std::unique_ptr<column> like(
-  strings_column_view const& input,
-  string_scalar const& pattern,
-  string_scalar const& escape_character,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> like(strings_column_view const& input,
+                             string_scalar const& pattern,
+                             string_scalar const& escape_character,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr)
 {
   auto results = make_numeric_column(data_type{type_id::BOOL8},
                                      input.size(),
diff --git a/cpp/src/strings/padding.cu b/cpp/src/strings/padding.cu
index d84b4afc7cf..e5497849681 100644
--- a/cpp/src/strings/padding.cu
+++ b/cpp/src/strings/padding.cu
@@ -53,13 +53,12 @@ struct compute_pad_output_length_fn {
 
 }  // namespace
 
-std::unique_ptr<column> pad(
-  strings_column_view const& strings,
-  size_type width,
-  side_type side                      = side_type::RIGHT,
-  std::string_view fill_char          = " ",
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> pad(strings_column_view const& strings,
+                            size_type width,
+                            side_type side,
+                            std::string_view fill_char,
+                            rmm::cuda_stream_view stream,
+                            rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = strings.size();
   if (strings_count == 0) return make_empty_column(type_id::STRING);
@@ -128,11 +127,10 @@ std::unique_ptr<column> pad(
                              std::move(null_mask));
 }
 
-std::unique_ptr<column> zfill(
-  strings_column_view const& input,
-  size_type width,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> zfill(strings_column_view const& input,
+                              size_type width,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
 
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index cc5cf1384ec..f15496ac159 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -125,13 +125,12 @@ struct replace_multi_regex_fn {
 
 }  // namespace
 
-std::unique_ptr<column> replace_re(
-  strings_column_view const& input,
-  std::vector<std::string> const& patterns,
-  strings_column_view const& replacements,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> replace_re(strings_column_view const& input,
+                                   std::vector<std::string> const& patterns,
+                                   strings_column_view const& replacements,
+                                   regex_flags const flags,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) { return make_empty_column(type_id::STRING); }
   if (patterns.empty()) {  // if no patterns; just return a copy
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index 04cb074c016..e9cc60f1d64 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -100,14 +100,13 @@ struct replace_regex_fn {
 }  // namespace
 
 //
-std::unique_ptr<column> replace_re(
-  strings_column_view const& input,
-  std::string_view pattern,
-  string_scalar const& replacement,
-  std::optional<size_type> max_replace_count,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> replace_re(strings_column_view const& input,
+                                   std::string_view pattern,
+                                   string_scalar const& replacement,
+                                   std::optional<size_type> max_replace_count,
+                                   regex_flags const flags,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
 
diff --git a/cpp/src/strings/search/find.cu b/cpp/src/strings/search/find.cu
index c48aedc5499..e6384d5d6e1 100644
--- a/cpp/src/strings/search/find.cu
+++ b/cpp/src/strings/search/find.cu
@@ -102,13 +102,12 @@ std::unique_ptr<column> find_fn(strings_column_view const& strings,
 
 }  // namespace
 
-std::unique_ptr<column> find(
-  strings_column_view const& strings,
-  string_scalar const& target,
-  size_type start                     = 0,
-  size_type stop                      = -1,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> find(strings_column_view const& strings,
+                             string_scalar const& target,
+                             size_type start,
+                             size_type stop,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(
                string_view d_string, string_view d_target, size_type start, size_type stop) {
@@ -122,13 +121,12 @@ std::unique_ptr<column> find(
   return find_fn(strings, target, start, stop, pfn, stream, mr);
 }
 
-std::unique_ptr<column> rfind(
-  strings_column_view const& strings,
-  string_scalar const& target,
-  size_type start                     = 0,
-  size_type stop                      = -1,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> rfind(strings_column_view const& strings,
+                              string_scalar const& target,
+                              size_type start,
+                              size_type stop,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(
                string_view d_string, string_view d_target, size_type start, size_type stop) {
@@ -366,11 +364,10 @@ std::unique_ptr<column> contains_fn(strings_column_view const& strings,
 
 }  // namespace
 
-std::unique_ptr<column> contains(
-  strings_column_view const& input,
-  string_scalar const& target,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> contains(strings_column_view const& input,
+                                 string_scalar const& target,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   // use warp parallel when the average string width is greater than the threshold
   if (!input.is_empty() && ((input.chars_size() / input.size()) > AVG_CHAR_BYTES_THRESHOLD)) {
@@ -384,11 +381,10 @@ std::unique_ptr<column> contains(
   return contains_fn(input, target, pfn, stream, mr);
 }
 
-std::unique_ptr<column> contains(
-  strings_column_view const& strings,
-  strings_column_view const& targets,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> contains(strings_column_view const& strings,
+                                 strings_column_view const& targets,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(string_view d_string, string_view d_target) {
     return d_string.find(d_target) != string_view::npos;
@@ -396,11 +392,10 @@ std::unique_ptr<column> contains(
   return contains_fn(strings, targets, pfn, stream, mr);
 }
 
-std::unique_ptr<column> starts_with(
-  strings_column_view const& strings,
-  string_scalar const& target,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> starts_with(strings_column_view const& strings,
+                                    string_scalar const& target,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(string_view d_string, string_view d_target) {
     return (d_target.size_bytes() <= d_string.size_bytes()) &&
@@ -409,11 +404,10 @@ std::unique_ptr<column> starts_with(
   return contains_fn(strings, target, pfn, stream, mr);
 }
 
-std::unique_ptr<column> starts_with(
-  strings_column_view const& strings,
-  strings_column_view const& targets,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> starts_with(strings_column_view const& strings,
+                                    strings_column_view const& targets,
+                                    rmm::cuda_stream_view stream,
+                                    rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(string_view d_string, string_view d_target) {
     return (d_target.size_bytes() <= d_string.size_bytes()) &&
@@ -422,11 +416,10 @@ std::unique_ptr<column> starts_with(
   return contains_fn(strings, targets, pfn, stream, mr);
 }
 
-std::unique_ptr<column> ends_with(
-  strings_column_view const& strings,
-  string_scalar const& target,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> ends_with(strings_column_view const& strings,
+                                  string_scalar const& target,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(string_view d_string, string_view d_target) {
     auto const str_size = d_string.size_bytes();
@@ -438,11 +431,10 @@ std::unique_ptr<column> ends_with(
   return contains_fn(strings, target, pfn, stream, mr);
 }
 
-std::unique_ptr<column> ends_with(
-  strings_column_view const& strings,
-  strings_column_view const& targets,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> ends_with(strings_column_view const& strings,
+                                  strings_column_view const& targets,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr)
 {
   auto pfn = [] __device__(string_view d_string, string_view d_target) {
     auto const str_size = d_string.size_bytes();
diff --git a/cpp/src/strings/search/find_multiple.cu b/cpp/src/strings/search/find_multiple.cu
index 389e6eccc43..1907c0d749b 100644
--- a/cpp/src/strings/search/find_multiple.cu
+++ b/cpp/src/strings/search/find_multiple.cu
@@ -34,11 +34,10 @@
 namespace cudf {
 namespace strings {
 namespace detail {
-std::unique_ptr<column> find_multiple(
-  strings_column_view const& input,
-  strings_column_view const& targets,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> find_multiple(strings_column_view const& input,
+                                      strings_column_view const& targets,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr)
 {
   auto const strings_count = input.size();
   auto const targets_count = targets.size();
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index 07829581aa6..b5b8cab65a7 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -92,12 +92,11 @@ std::unique_ptr<column> findall_util(column_device_view const& d_strings,
 }  // namespace
 
 //
-std::unique_ptr<column> findall(
-  strings_column_view const& input,
-  std::string_view pattern,
-  regex_flags const flags,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> findall(strings_column_view const& input,
+                                std::string_view pattern,
+                                regex_flags const flags,
+                                rmm::cuda_stream_view stream,
+                                rmm::mr::device_memory_resource* mr)
 {
   auto const strings_count = input.size();
   auto const d_strings     = column_device_view::create(input.parent(), stream);
diff --git a/cpp/src/strings/split/partition.cu b/cpp/src/strings/split/partition.cu
index acdd9efbb45..09aadb78554 100644
--- a/cpp/src/strings/split/partition.cu
+++ b/cpp/src/strings/split/partition.cu
@@ -181,11 +181,10 @@ struct rpartition_fn : public partition_fn {
 
 }  // namespace
 
-std::unique_ptr<table> partition(
-  strings_column_view const& strings,
-  string_scalar const& delimiter      = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<table> partition(strings_column_view const& strings,
+                                 string_scalar const& delimiter,
+                                 rmm::cuda_stream_view stream,
+                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
   auto strings_count = strings.size();
@@ -209,11 +208,10 @@ std::unique_ptr<table> partition(
   return std::make_unique<table>(std::move(results));
 }
 
-std::unique_ptr<table> rpartition(
-  strings_column_view const& strings,
-  string_scalar const& delimiter      = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<table> rpartition(strings_column_view const& strings,
+                                  string_scalar const& delimiter,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
   auto strings_count = strings.size();
diff --git a/cpp/src/strings/split/split.cu b/cpp/src/strings/split/split.cu
index 89b4c1d75c2..c11d7ad47f9 100644
--- a/cpp/src/strings/split/split.cu
+++ b/cpp/src/strings/split/split.cu
@@ -791,12 +791,11 @@ std::unique_ptr<table> whitespace_split_fn(size_type strings_count,
 
 }  // namespace
 
-std::unique_ptr<table> split(
-  strings_column_view const& strings_column,
-  string_scalar const& delimiter      = string_scalar(""),
-  size_type maxsplit                  = -1,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<table> split(strings_column_view const& strings_column,
+                             string_scalar const& delimiter,
+                             size_type maxsplit,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
 
@@ -816,12 +815,11 @@ std::unique_ptr<table> split(
     strings_column, split_tokenizer_fn{*strings_device_view, d_delimiter, max_tokens}, stream, mr);
 }
 
-std::unique_ptr<table> rsplit(
-  strings_column_view const& strings_column,
-  string_scalar const& delimiter      = string_scalar(""),
-  size_type maxsplit                  = -1,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
+                              string_scalar const& delimiter,
+                              size_type maxsplit,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
 
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index 2538bab6229..a17c0a575fb 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -205,7 +205,8 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
-  auto offsets      = count_matches(*d_strings, *d_prog, strings_count + 1, stream);
+  auto offsets = count_matches(
+    *d_strings, *d_prog, strings_count + 1, stream, rmm::mr::get_current_device_resource());
   auto offsets_view = offsets->mutable_view();
   auto d_offsets    = offsets_view.data<offset_type>();
 
diff --git a/cpp/src/strings/split/split_record.cu b/cpp/src/strings/split/split_record.cu
index 83d8d7f9203..d935ad0b1da 100644
--- a/cpp/src/strings/split/split_record.cu
+++ b/cpp/src/strings/split/split_record.cu
@@ -264,12 +264,11 @@ std::unique_ptr<column> split_record_fn(strings_column_view const& strings,
 }
 
 template <Dir dir>
-std::unique_ptr<column> split_record(
-  strings_column_view const& strings,
-  string_scalar const& delimiter      = string_scalar(""),
-  size_type maxsplit                  = -1,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> split_record(strings_column_view const& strings,
+                                     string_scalar const& delimiter,
+                                     size_type maxsplit,
+                                     rmm::cuda_stream_view stream,
+                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(delimiter.is_valid(stream), "Parameter delimiter must be valid");
 
diff --git a/cpp/src/strings/strings_column_factories.cu b/cpp/src/strings/strings_column_factories.cu
index ca30eb3f6d8..2159b67774e 100644
--- a/cpp/src/strings/strings_column_factories.cu
+++ b/cpp/src/strings/strings_column_factories.cu
@@ -56,13 +56,12 @@ std::unique_ptr<column> make_strings_column(
   return cudf::strings::detail::make_strings_column(strings.begin(), strings.end(), stream, mr);
 }
 
-std::unique_ptr<column> make_strings_column(
-  device_span<char> chars,
-  device_span<size_type> offsets,
-  size_type null_count,
-  rmm::device_buffer&& null_mask,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> make_strings_column(device_span<char> chars,
+                                            device_span<size_type> offsets,
+                                            size_type null_count,
+                                            rmm::device_buffer&& null_mask,
+                                            rmm::cuda_stream_view stream,
+                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
 
diff --git a/cpp/src/strings/strip.cu b/cpp/src/strings/strip.cu
index e982050b8d6..6fb7c671a87 100644
--- a/cpp/src/strings/strip.cu
+++ b/cpp/src/strings/strip.cu
@@ -56,12 +56,11 @@ struct strip_transform_fn {
 
 }  // namespace
 
-std::unique_ptr<column> strip(
-  strings_column_view const& input,
-  side_type side                      = side_type::BOTH,
-  string_scalar const& to_strip       = string_scalar(""),
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> strip(strings_column_view const& input,
+                              side_type side,
+                              string_scalar const& to_strip,
+                              rmm::cuda_stream_view stream,
+                              rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
 
diff --git a/cpp/src/strings/substring.cu b/cpp/src/strings/substring.cu
index e0d1bc8cf31..2acc834a1cb 100644
--- a/cpp/src/strings/substring.cu
+++ b/cpp/src/strings/substring.cu
@@ -105,13 +105,12 @@ struct substring_fn {
 }  // namespace
 
 //
-std::unique_ptr<column> slice_strings(
-  strings_column_view const& strings,
-  numeric_scalar<size_type> const& start = numeric_scalar<size_type>(0, false),
-  numeric_scalar<size_type> const& stop  = numeric_scalar<size_type>(0, false),
-  numeric_scalar<size_type> const& step  = numeric_scalar<size_type>(1),
-  rmm::cuda_stream_view stream           = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr    = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+                                      numeric_scalar<size_type> const& start,
+                                      numeric_scalar<size_type> const& stop,
+                                      numeric_scalar<size_type> const& step,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr)
 {
   if (strings.is_empty()) return make_empty_column(type_id::STRING);
 
@@ -291,12 +290,11 @@ void compute_substring_indices(column_device_view const& d_column,
 }  // namespace
 
 //
-std::unique_ptr<column> slice_strings(
-  strings_column_view const& strings,
-  column_view const& starts_column,
-  column_view const& stops_column,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> slice_strings(strings_column_view const& strings,
+                                      column_view const& starts_column,
+                                      column_view const& stops_column,
+                                      rmm::cuda_stream_view stream,
+                                      rmm::mr::device_memory_resource* mr)
 {
   size_type strings_count = strings.size();
   if (strings_count == 0) return make_empty_column(type_id::STRING);
diff --git a/cpp/src/strings/translate.cu b/cpp/src/strings/translate.cu
index 01ecc49f10a..5b23b092cce 100644
--- a/cpp/src/strings/translate.cu
+++ b/cpp/src/strings/translate.cu
@@ -86,11 +86,10 @@ struct translate_fn {
 }  // namespace
 
 //
-std::unique_ptr<column> translate(
-  strings_column_view const& strings,
-  std::vector<std::pair<char_utf8, char_utf8>> const& chars_table,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> translate(strings_column_view const& strings,
+                                  std::vector<std::pair<char_utf8, char_utf8>> const& chars_table,
+                                  rmm::cuda_stream_view stream,
+                                  rmm::mr::device_memory_resource* mr)
 {
   if (strings.is_empty()) return make_empty_column(type_id::STRING);
 
diff --git a/cpp/src/strings/wrap.cu b/cpp/src/strings/wrap.cu
index cd0aafc3545..335908d65d1 100644
--- a/cpp/src/strings/wrap.cu
+++ b/cpp/src/strings/wrap.cu
@@ -91,11 +91,10 @@ struct execute_wrap {
 }  // namespace
 
 template <typename device_execute_functor>
-std::unique_ptr<column> wrap(
-  strings_column_view const& strings,
-  size_type width,
-  rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource())
+std::unique_ptr<column> wrap(strings_column_view const& strings,
+                             size_type width,
+                             rmm::cuda_stream_view stream,
+                             rmm::mr::device_memory_resource* mr)
 {
   CUDF_EXPECTS(width > 0, "Positive wrap width required");
 
diff --git a/cpp/tests/strings/array_tests.cpp b/cpp/tests/strings/array_tests.cpp
index 488184f4099..11f5c9f39aa 100644
--- a/cpp/tests/strings/array_tests.cpp
+++ b/cpp/tests/strings/array_tests.cpp
@@ -17,11 +17,11 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 
 #include <cudf/copying.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/sorting.hpp>
-#include <cudf/strings/detail/copying.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table_view.hpp>
 
@@ -61,14 +61,14 @@ class SliceParmsTest : public StringsColumnTest,
 TEST_P(SliceParmsTest, Slice)
 {
   std::vector<const char*> h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
-  cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+  cudf::test::strings_column_wrapper input(
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
 
   cudf::size_type start = 3;
   cudf::size_type end   = GetParam();
-  auto results = cudf::strings::detail::copy_slice(cudf::strings_column_view(strings), start, end);
+
+  auto scol    = cudf::slice(input, {start, end});
+  auto results = std::make_unique<cudf::column>(scol.front());
 
   cudf::test::strings_column_wrapper expected(
     h_strings.begin() + start,
@@ -81,14 +81,14 @@ TEST_P(SliceParmsTest, Slice)
 TEST_P(SliceParmsTest, SliceAllNulls)
 {
   std::vector<const char*> h_strings{nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr};
-  cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+  cudf::test::strings_column_wrapper input(
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
 
   cudf::size_type start = 3;
   cudf::size_type end   = GetParam();
-  auto results = cudf::strings::detail::copy_slice(cudf::strings_column_view(strings), start, end);
+
+  auto scol    = cudf::slice(input, {start, end});
+  auto results = std::make_unique<cudf::column>(scol.front());
 
   cudf::test::strings_column_wrapper expected(
     h_strings.begin() + start,
@@ -101,11 +101,13 @@ TEST_P(SliceParmsTest, SliceAllNulls)
 TEST_P(SliceParmsTest, SliceAllEmpty)
 {
   std::vector<const char*> h_strings{"", "", "", "", "", "", ""};
-  cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end());
+  cudf::test::strings_column_wrapper input(h_strings.begin(), h_strings.end());
 
   cudf::size_type start = 3;
   cudf::size_type end   = GetParam();
-  auto results = cudf::strings::detail::copy_slice(cudf::strings_column_view(strings), start, end);
+
+  auto scol    = cudf::slice(input, {start, end});
+  auto results = std::make_unique<cudf::column>(scol.front());
 
   cudf::test::strings_column_wrapper expected(h_strings.begin() + start, h_strings.begin() + end);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
@@ -119,8 +121,8 @@ TEST_F(StringsColumnTest, SliceZeroSizeStringsColumn)
 {
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
-  auto strings_view = cudf::strings_column_view(zero_size_strings_column);
-  auto results      = cudf::strings::detail::copy_slice(strings_view, 1, 2);
+  auto scol    = cudf::slice(zero_size_strings_column, {0, 0});
+  auto results = std::make_unique<cudf::column>(scol.front());
   cudf::test::expect_column_empty(results->view());
 }
 
@@ -128,18 +130,14 @@ TEST_F(StringsColumnTest, Gather)
 {
   std::vector<const char*> h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
   cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
 
   cudf::test::fixed_width_column_wrapper<int32_t> gather_map{{4, 1}};
   auto results = cudf::gather(cudf::table_view{{strings}}, gather_map)->release();
 
   std::vector<const char*> h_expected{"aa", "bb"};
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(results.front()->view(), expected);
 }
 
diff --git a/cpp/tests/strings/concatenate_tests.cpp b/cpp/tests/strings/concatenate_tests.cpp
index 387f0f5c997..e4f2f7ca62c 100644
--- a/cpp/tests/strings/concatenate_tests.cpp
+++ b/cpp/tests/strings/concatenate_tests.cpp
@@ -19,7 +19,7 @@
 #include <cudf_test/column_wrapper.hpp>
 
 #include <cudf/column/column_factories.hpp>
-#include <cudf/strings/detail/concatenate.hpp>
+#include <cudf/concatenate.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
 #include <vector>
@@ -60,7 +60,7 @@ TEST_F(StringsConcatenateTest, Concatenate)
   strings_columns.push_back(strings2);
   strings_columns.push_back(strings3);
 
-  auto results = cudf::strings::detail::concatenate(strings_columns, cudf::get_default_stream());
+  auto results = cudf::concatenate(strings_columns);
 
   cudf::test::strings_column_wrapper expected(h_strings.begin(), h_strings.end());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
@@ -74,7 +74,7 @@ TEST_F(StringsConcatenateTest, ZeroSizeStringsColumns)
   strings_columns.push_back(zero_size_strings_column);
   strings_columns.push_back(zero_size_strings_column);
   strings_columns.push_back(zero_size_strings_column);
-  auto results = cudf::strings::detail::concatenate(strings_columns, cudf::get_default_stream());
+  auto results = cudf::concatenate(strings_columns);
   cudf::test::expect_column_empty(results->view());
 }
 
@@ -107,6 +107,6 @@ TEST_F(StringsConcatenateTest, ZeroSizeStringsPlusNormal)
                                               h_strings.data() + h_strings.size());
   strings_columns.push_back(strings1);
 
-  auto results = cudf::strings::detail::concatenate(strings_columns, cudf::get_default_stream());
+  auto results = cudf::concatenate(strings_columns);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings1);
 }
diff --git a/cpp/tests/strings/fill_tests.cpp b/cpp/tests/strings/fill_tests.cpp
index ed731fe39b4..c3a1710bb83 100644
--- a/cpp/tests/strings/fill_tests.cpp
+++ b/cpp/tests/strings/fill_tests.cpp
@@ -17,13 +17,11 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/filling.hpp>
 #include <cudf/scalar/scalar.hpp>
-#include <cudf/strings/detail/fill.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
-#include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
 
@@ -33,48 +31,37 @@ struct StringsFillTest : public cudf::test::BaseFixture {
 TEST_F(StringsFillTest, Fill)
 {
   std::vector<const char*> h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
-  cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-  cudf::strings_column_view view(strings);
+  cudf::test::strings_column_wrapper input(
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
+
   {
-    auto results = cudf::strings::detail::fill(
-      view, 1, 5, cudf::string_scalar("zz"), cudf::get_default_stream());
+    auto results = cudf::fill(input, 1, 5, cudf::string_scalar("zz"));
 
     std::vector<const char*> h_expected{"eee", "zz", "zz", "zz", "zz", "bbb", "ééé"};
     cudf::test::strings_column_wrapper expected(
-      h_expected.begin(),
-      h_expected.end(),
-      thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+      h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results = cudf::strings::detail::fill(
-      view, 2, 4, cudf::string_scalar("", false), cudf::get_default_stream());
+    auto results = cudf::fill(input, 2, 4, cudf::string_scalar("", false));
 
     std::vector<const char*> h_expected{"eee", "bb", nullptr, nullptr, "aa", "bbb", "ééé"};
     cudf::test::strings_column_wrapper expected(
-      h_expected.begin(),
-      h_expected.end(),
-      thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+      h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results = cudf::strings::detail::fill(
-      view, 5, 5, cudf::string_scalar("zz"), cudf::get_default_stream());
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, view.parent());
+    auto results = cudf::fill(input, 5, 5, cudf::string_scalar("zz"));
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, input);
   }
   {
-    auto results =
-      cudf::strings::detail::fill(view, 0, 7, cudf::string_scalar(""), cudf::get_default_stream());
+    auto results = cudf::fill(input, 0, 7, cudf::string_scalar(""));
     cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
                                                 {1, 1, 1, 1, 1, 1, 1});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results = cudf::strings::detail::fill(
-      view, 0, 7, cudf::string_scalar("", false), cudf::get_default_stream());
+    auto results = cudf::fill(input, 0, 7, cudf::string_scalar("", false));
     cudf::test::strings_column_wrapper expected({"", "", "", "", "", "", ""},
                                                 {0, 0, 0, 0, 0, 0, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
@@ -85,27 +72,16 @@ TEST_F(StringsFillTest, ZeroSizeStringsColumns)
 {
   cudf::column_view zero_size_strings_column(
     cudf::data_type{cudf::type_id::STRING}, 0, nullptr, nullptr, 0);
-  auto results = cudf::strings::detail::fill(cudf::strings_column_view(zero_size_strings_column),
-                                             0,
-                                             1,
-                                             cudf::string_scalar(""),
-                                             cudf::get_default_stream());
+  auto results = cudf::fill(zero_size_strings_column, 0, 0, cudf::string_scalar(""));
   cudf::test::expect_column_empty(results->view());
 }
 
 TEST_F(StringsFillTest, FillRangeError)
 {
   std::vector<const char*> h_strings{"eee", "bb", nullptr, "", "aa", "bbb", "ééé"};
-  cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-  cudf::strings_column_view view(strings);
+  cudf::test::strings_column_wrapper input(
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
 
-  EXPECT_THROW(
-    cudf::strings::detail::fill(view, 5, 1, cudf::string_scalar(""), cudf::get_default_stream()),
-    cudf::logic_error);
-  EXPECT_THROW(
-    cudf::strings::detail::fill(view, 5, 9, cudf::string_scalar(""), cudf::get_default_stream()),
-    cudf::logic_error);
+  EXPECT_THROW(cudf::fill(input, 5, 1, cudf::string_scalar("")), cudf::logic_error);
+  EXPECT_THROW(cudf::fill(input, 5, 9, cudf::string_scalar("")), cudf::logic_error);
 }
diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp
index cd39c1e088a..da0667f54cf 100644
--- a/cpp/tests/strings/replace_tests.cpp
+++ b/cpp/tests/strings/replace_tests.cpp
@@ -17,6 +17,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 
 #include <cudf/column/column.hpp>
 #include <cudf/strings/detail/replace.hpp>
@@ -50,8 +51,8 @@ struct StringsReplaceTest : public cudf::test::BaseFixture {
 
 TEST_F(StringsReplaceTest, Replace)
 {
-  auto strings      = build_corpus();
-  auto strings_view = cudf::strings_column_view(strings);
+  auto input        = build_corpus();
+  auto strings_view = cudf::strings_column_view(input);
   // replace all occurrences of 'the ' with '++++ '
   std::vector<const char*> h_expected{"++++ quick brown fox jumps over ++++ lazy dog",
                                       "++++ fat cat lays next to ++++ other accénted cat",
@@ -61,24 +62,29 @@ TEST_F(StringsReplaceTest, Replace)
                                       "",
                                       nullptr};
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
+
+  auto stream = cudf::get_default_stream();
+  auto mr     = rmm::mr::get_current_device_resource();
+
   auto results =
     cudf::strings::replace(strings_view, cudf::string_scalar("the "), cudf::string_scalar("++++ "));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   results = cudf::strings::detail::replace<algorithm::CHAR_PARALLEL>(
-    strings_view, cudf::string_scalar("the "), cudf::string_scalar("++++ "));
+    strings_view, cudf::string_scalar("the "), cudf::string_scalar("++++ "), -1, stream, mr);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   results = cudf::strings::detail::replace<algorithm::ROW_PARALLEL>(
-    strings_view, cudf::string_scalar("the "), cudf::string_scalar("++++ "));
+    strings_view, cudf::string_scalar("the "), cudf::string_scalar("++++ "), -1, stream, mr);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
 TEST_F(StringsReplaceTest, ReplaceReplLimit)
 {
-  auto strings      = build_corpus();
-  auto strings_view = cudf::strings_column_view(strings);
+  auto input        = build_corpus();
+  auto strings_view = cudf::strings_column_view(input);
+  auto stream       = cudf::get_default_stream();
+  auto mr           = rmm::mr::get_current_device_resource();
+
   // only remove the first occurrence of 'the '
   std::vector<const char*> h_expected{"quick brown fox jumps over the lazy dog",
                                       "fat cat lays next to the other accénted cat",
@@ -88,23 +94,21 @@ TEST_F(StringsReplaceTest, ReplaceReplLimit)
                                       "",
                                       nullptr};
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
   auto results =
     cudf::strings::replace(strings_view, cudf::string_scalar("the "), cudf::string_scalar(""), 1);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   results = cudf::strings::detail::replace<algorithm::CHAR_PARALLEL>(
-    strings_view, cudf::string_scalar("the "), cudf::string_scalar(""), 1);
+    strings_view, cudf::string_scalar("the "), cudf::string_scalar(""), 1, stream, mr);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   results = cudf::strings::detail::replace<algorithm::ROW_PARALLEL>(
-    strings_view, cudf::string_scalar("the "), cudf::string_scalar(""), 1);
+    strings_view, cudf::string_scalar("the "), cudf::string_scalar(""), 1, stream, mr);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
 TEST_F(StringsReplaceTest, ReplaceReplLimitInputSliced)
 {
-  auto strings = build_corpus();
+  auto input = build_corpus();
   // replace first two occurrences of ' ' with '--'
   std::vector<const char*> h_expected{"the--quick--brown fox jumps over the lazy dog",
                                       "the--fat--cat lays next to the other accénted cat",
@@ -114,11 +118,11 @@ TEST_F(StringsReplaceTest, ReplaceReplLimitInputSliced)
                                       "",
                                       nullptr};
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
+  auto stream = cudf::get_default_stream();
+  auto mr     = rmm::mr::get_current_device_resource();
   std::vector<cudf::size_type> slice_indices{0, 2, 2, 3, 3, 7};
-  auto sliced_strings  = cudf::slice(strings, slice_indices);
+  auto sliced_strings  = cudf::slice(input, slice_indices);
   auto sliced_expected = cudf::slice(expected, slice_indices);
   for (size_t i = 0; i < sliced_strings.size(); ++i) {
     auto strings_view = cudf::strings_column_view(sliced_strings[i]);
@@ -126,10 +130,10 @@ TEST_F(StringsReplaceTest, ReplaceReplLimitInputSliced)
       cudf::strings::replace(strings_view, cudf::string_scalar(" "), cudf::string_scalar("--"), 2);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, sliced_expected[i]);
     results = cudf::strings::detail::replace<algorithm::CHAR_PARALLEL>(
-      strings_view, cudf::string_scalar(" "), cudf::string_scalar("--"), 2);
+      strings_view, cudf::string_scalar(" "), cudf::string_scalar("--"), 2, stream, mr);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, sliced_expected[i]);
     results = cudf::strings::detail::replace<algorithm::ROW_PARALLEL>(
-      strings_view, cudf::string_scalar(" "), cudf::string_scalar("--"), 2);
+      strings_view, cudf::string_scalar(" "), cudf::string_scalar("--"), 2, stream, mr);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, sliced_expected[i]);
   }
 }
@@ -139,9 +143,9 @@ TEST_F(StringsReplaceTest, ReplaceTargetOverlap)
   auto corpus      = build_corpus();
   auto corpus_view = cudf::strings_column_view(corpus);
   // replace all occurrences of 'the ' with '+++++++ '
-  auto strings = cudf::strings::replace(
+  auto input = cudf::strings::replace(
     corpus_view, cudf::string_scalar("the "), cudf::string_scalar("++++++++ "));
-  auto strings_view = cudf::strings_column_view(*strings);
+  auto strings_view = cudf::strings_column_view(*input);
   // replace all occurrences of '+++' with 'plus '
   std::vector<const char*> h_expected{
     "plus plus ++ quick brown fox jumps over plus plus ++ lazy dog",
@@ -152,60 +156,71 @@ TEST_F(StringsReplaceTest, ReplaceTargetOverlap)
     "",
     nullptr};
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
+
+  auto stream = cudf::get_default_stream();
+  auto mr     = rmm::mr::get_current_device_resource();
+
   auto results =
     cudf::strings::replace(strings_view, cudf::string_scalar("+++"), cudf::string_scalar("plus "));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
   results = cudf::strings::detail::replace<algorithm::CHAR_PARALLEL>(
-    strings_view, cudf::string_scalar("+++"), cudf::string_scalar("plus "));
+    strings_view, cudf::string_scalar("+++"), cudf::string_scalar("plus "), -1, stream, mr);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   results = cudf::strings::detail::replace<algorithm::ROW_PARALLEL>(
-    strings_view, cudf::string_scalar("+++"), cudf::string_scalar("plus "));
+    strings_view, cudf::string_scalar("+++"), cudf::string_scalar("plus "), -1, stream, mr);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
 TEST_F(StringsReplaceTest, ReplaceTargetOverlapsStrings)
 {
-  auto strings      = build_corpus();
-  auto strings_view = cudf::strings_column_view(strings);
+  auto input        = build_corpus();
+  auto strings_view = cudf::strings_column_view(input);
+  auto stream       = cudf::get_default_stream();
+  auto mr           = rmm::mr::get_current_device_resource();
+
   // replace all occurrences of 'dogthe' with '+'
   // should not replace anything unless it incorrectly matches across a string boundary
   auto results =
     cudf::strings::replace(strings_view, cudf::string_scalar("dogthe"), cudf::string_scalar("+"));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input);
   results = cudf::strings::detail::replace<algorithm::CHAR_PARALLEL>(
-    strings_view, cudf::string_scalar("dogthe"), cudf::string_scalar("+"));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+    strings_view, cudf::string_scalar("dogthe"), cudf::string_scalar("+"), -1, stream, mr);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input);
   results = cudf::strings::detail::replace<algorithm::ROW_PARALLEL>(
-    strings_view, cudf::string_scalar("dogthe"), cudf::string_scalar("+"));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+    strings_view, cudf::string_scalar("dogthe"), cudf::string_scalar("+"), -1, stream, mr);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input);
 }
 
 TEST_F(StringsReplaceTest, ReplaceNullInput)
 {
   std::vector<const char*> h_null_strings(128);
-  auto strings = cudf::test::strings_column_wrapper(
+  auto input = cudf::test::strings_column_wrapper(
     h_null_strings.begin(), h_null_strings.end(), thrust::make_constant_iterator(false));
-  auto strings_view = cudf::strings_column_view(strings);
+  auto strings_view = cudf::strings_column_view(input);
+  auto stream       = cudf::get_default_stream();
+  auto mr           = rmm::mr::get_current_device_resource();
   // replace all occurrences of '+' with ''
   // should not replace anything as input is all null
   auto results =
     cudf::strings::replace(strings_view, cudf::string_scalar("+"), cudf::string_scalar(""));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input);
   results = cudf::strings::detail::replace<algorithm::CHAR_PARALLEL>(
-    strings_view, cudf::string_scalar("+"), cudf::string_scalar(""));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+    strings_view, cudf::string_scalar("+"), cudf::string_scalar(""), -1, stream, mr);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input);
   results = cudf::strings::detail::replace<algorithm::ROW_PARALLEL>(
-    strings_view, cudf::string_scalar("+"), cudf::string_scalar(""));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+    strings_view, cudf::string_scalar("+"), cudf::string_scalar(""), -1, stream, mr);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, input);
 }
 
 TEST_F(StringsReplaceTest, ReplaceEndOfString)
 {
-  auto strings      = build_corpus();
-  auto strings_view = cudf::strings_column_view(strings);
+  auto input        = build_corpus();
+  auto strings_view = cudf::strings_column_view(input);
+  auto stream       = cudf::get_default_stream();
+  auto mr           = rmm::mr::get_current_device_resource();
+
   // replace all occurrences of 'in' with  ' '
   std::vector<const char*> h_expected{"the quick brown fox jumps over the lazy dog",
                                       "the fat cat lays next to the other accénted cat",
@@ -216,20 +231,18 @@ TEST_F(StringsReplaceTest, ReplaceEndOfString)
                                       nullptr};
 
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
 
   auto results =
     cudf::strings::replace(strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "));
   cudf::test::expect_columns_equal(*results, expected);
 
   results = cudf::strings::detail::replace<cudf::strings::detail::replace_algorithm::CHAR_PARALLEL>(
-    strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "));
+    strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "), -1, stream, mr);
   cudf::test::expect_columns_equal(*results, expected);
 
   results = cudf::strings::detail::replace<cudf::strings::detail::replace_algorithm::ROW_PARALLEL>(
-    strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "));
+    strings_view, cudf::string_scalar("in"), cudf::string_scalar(" "), -1, stream, mr);
   cudf::test::expect_columns_equal(*results, expected);
 }
 
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index 7927740fd49..b4d5014d9e0 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -58,7 +58,7 @@ std::unique_ptr<rmm::device_buffer> to_string_view_array(cudf::column_view const
 {
   return std::make_unique<rmm::device_buffer>(
     std::move(cudf::strings::detail::create_string_vector_from_column(
-                cudf::strings_column_view(input), stream)
+                cudf::strings_column_view(input), stream, rmm::mr::get_current_device_resource())
                 .release()));
 }
 

From b1c2520a017e0ecdb10697255b7710d0fada11dd Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 4 Nov 2022 12:29:12 -0500
Subject: [PATCH 124/202] Remove overflow error during decimal binops (#12063)

Fixes: #11337

- [x] This PR removes raising of an overflow error and rather let's the data overflow similar to what we do with other numeric dtypes.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12063
---
 python/cudf/cudf/core/column/decimal.py | 10 +++++++++-
 python/cudf/cudf/tests/test_decimal.py  |  8 +++++++-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 0beb07bb591..5ee9024a0d8 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -399,4 +399,12 @@ def _get_decimal_type(lhs_dtype, rhs_dtype, op):
                 # to try the next dtype
                 continue
 
-    raise OverflowError("Maximum supported decimal type is Decimal128")
+    # Instead of raising an overflow error, we create a `Decimal128Dtype`
+    # with max possible scale & precision, see example of this demonstration
+    # here: https://learn.microsoft.com/en-us/sql/t-sql/data-types/
+    # precision-scale-and-length-transact-sql?view=sql-server-ver16#examples
+    scale = min(
+        scale, cudf.Decimal128Dtype.MAX_PRECISION - (precision - scale)
+    )
+    precision = min(cudf.Decimal128Dtype.MAX_PRECISION, max_precision)
+    return cudf.Decimal128Dtype(precision=precision, scale=scale)
diff --git a/python/cudf/cudf/tests/test_decimal.py b/python/cudf/cudf/tests/test_decimal.py
index c37381a3af9..c7174adf342 100644
--- a/python/cudf/cudf/tests/test_decimal.py
+++ b/python/cudf/cudf/tests/test_decimal.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 import decimal
 from decimal import Decimal
@@ -377,3 +377,9 @@ def test_decimal_invalid_precision():
 
     with pytest.raises(pa.ArrowInvalid):
         _ = cudf.Series([Decimal("300")], dtype=cudf.Decimal64Dtype(2, 1))
+
+
+def test_decimal_overflow():
+    s = cudf.Series([Decimal("0.0009384233522166997927180531650178250")])
+    result = s * s
+    assert_eq(cudf.Decimal128Dtype(precision=38, scale=37), result.dtype)

From e788f368c70e3092673dfced0b6213b47840c0a3 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Fri, 4 Nov 2022 23:06:09 +0530
Subject: [PATCH 125/202] Fixes List offset bug in Nested JSON reader (#12060)

Fixes List offset end last item write condition bug
If there is a list row followed by empty list in next row, the previous row's end is not written to offsets.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12060
---
 cpp/src/io/json/json_column.cu      |  5 ++--
 python/cudf/cudf/tests/test_json.py | 46 +++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index cee023a1061..0ac3efb407e 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -525,14 +525,15 @@ void make_device_json_column(device_span<SymbolT const> input,
       auto parent_node_id = ordered_parent_node_ids[i];
       if (parent_node_id != parent_node_sentinel and node_categories[parent_node_id] == NC_LIST) {
         // unique item
-        if (i == 0 ||
+        if (i == 0 or
             (col_ids[i - 1] != col_ids[i] or ordered_parent_node_ids[i - 1] != parent_node_id)) {
           // scatter to list_offset
           d_columns_data[original_col_ids[parent_node_id]]
             .child_offsets[row_offsets[parent_node_id]] = ordered_row_offsets[i];
         }
         // TODO: verify if this code is right. check with more test cases.
-        if (i == num_nodes - 1 || (col_ids[i] != col_ids[i + 1])) {
+        if (i == num_nodes - 1 or
+            (col_ids[i] != col_ids[i + 1] or ordered_parent_node_ids[i + 1] != parent_node_id)) {
           // last value of list child_offset is its size.
           d_columns_data[original_col_ids[parent_node_id]]
             .child_offsets[row_offsets[parent_node_id] + 1] = ordered_row_offsets[i] + 1;
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 00d6e0b2899..34aff2c34fe 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -897,3 +897,49 @@ def test_json_dtypes_nested_data():
         pdf, schema=df.to_arrow().schema, safe=False
     )
     assert df.to_arrow().equals(pa_table_pdf)
+
+
+@pytest.mark.parametrize(
+    "tag, data",
+    [
+        (
+            "normal",
+            """\
+{"a": 1, "b": 2}
+{"a": 3, "b": 4}""",
+        ),
+        (
+            "multiple",
+            """\
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
+    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14 }""",
+        ),
+        (
+            "reordered",
+            """\
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "c": 12 , "b" : [4, 5   ]}
+    { "b" : [6      ],  "a": { "y" : 6}, "c": 13}
+    { "c" : 14, "a": { "y" : 6}, "b" : [7      ]}
+""",
+        ),
+        (
+            "missing",
+            """
+            { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+            { "a": { "y" : 6}, "b" : [4, 5   ]}
+            { "a": { "y" : 6}, "c": 13 }
+            { "a": { "y" : 6}, "b" : [7      ], "c": 14 }
+        """,
+        ),
+    ],
+)
+def test_order_nested_json_reader(tag, data):
+    expected = cudf.read_json(StringIO(data), engine="pandas", lines=True)
+    target = cudf.read_json(
+        StringIO(data), engine="cudf_experimental", lines=True
+    )
+
+    assert_eq(expected, target, check_dtype=True)

From a3e9c1c0cc8da4fcabbfc0f173cba92a69769a60 Mon Sep 17 00:00:00 2001
From: Jim Brennan <jimb@nvidia.com>
Date: Fri, 4 Nov 2022 14:32:48 -0500
Subject: [PATCH 126/202] Mark nvcomp zstd compression stable (#12059)

NVCOMP zstd compression was added in 22.10, but marked experimental, meaning you have to define the environment variable `LIBCUDF_NVCOMP_POLICY=ALWAYS` to enable it.  After completing validation testing using the spark rapids plugin as documented here: https://github.com/NVIDIA/spark-rapids/issues/3037, we believe that we can now change the zstd compression status to stable, which will enable it in cudf by default.  `LIBCUDF_NVCOMP_POLICY=STABLE` is the default value.

Authors:
  - Jim Brennan (https://github.com/jbrennan333)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12059
---
 cpp/src/io/comp/nvcomp_adapter.cpp | 2 +-
 docs/cudf/source/user_guide/io.md  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index fd794b2e66c..20912e9209f 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -398,7 +398,7 @@ bool is_compression_enabled(compression_type compression)
       return false;
     case compression_type::SNAPPY: return detail::nvcomp_integration::is_stable_enabled();
     case compression_type::ZSTD:
-      return NVCOMP_HAS_ZSTD_COMP and detail::nvcomp_integration::is_all_enabled();
+      return NVCOMP_HAS_ZSTD_COMP and detail::nvcomp_integration::is_stable_enabled();
     default: return false;
   }
   return false;
diff --git a/docs/cudf/source/user_guide/io.md b/docs/cudf/source/user_guide/io.md
index 9099a761f2c..3a803953502 100644
--- a/docs/cudf/source/user_guide/io.md
+++ b/docs/cudf/source/user_guide/io.md
@@ -170,7 +170,7 @@ If no value is set, behavior will be the same as the "STABLE" option.
     +=======================+========+========+==============+==============+=========+========+==============+==============+========+
     | Snappy                | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌     |
     +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
-    | ZSTD                  | ❌     | ❌     | Experimental | Experimental | ❌      | ❌     | Experimental | Experimental | ❌     |
+    | ZSTD                  | ❌     | ❌     | Stable       | Stable       | ❌      | ❌     | Stable       | Stable       | ❌     |
     +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+
     | DEFLATE               | ❌     | ❌     | ❌           | ❌           | ❌      | ❌     | Experimental | Experimental | ❌     |
     +-----------------------+--------+--------+--------------+--------------+---------+--------+--------------+--------------+--------+

From 6e13139fe3a610b8381cf5926c8a4f224c54d613 Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Fri, 4 Nov 2022 16:53:49 -0500
Subject: [PATCH 127/202] Add debug-only onAllocated/onDeallocated to
 RmmEventHandler (#12054)

This adds `onAllocated` and `onDeallocated` to `RmmEventHandler` as debug callbacks. If the event handler is installed with debug enabled (in `Rmm.setEventHandler`) these callbacks will be invoked when an allocation or deallocation finishes.

It also fixes a bug with https://github.com/rapidsai/cudf/pull/11950 where the initial allocated amount was not getting set appropriately. It was getting set to 0, but instead it should be set to the new initial value/maximum.

Closes https://github.com/rapidsai/cudf/issues/11949.

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/12054
---
 java/src/main/java/ai/rapids/cudf/Rmm.java    | 25 ++++-
 .../java/ai/rapids/cudf/RmmEventHandler.java  | 12 +++
 java/src/main/native/src/RmmJni.cpp           | 97 +++++++++++++++----
 .../src/test/java/ai/rapids/cudf/RmmTest.java | 26 ++++-
 4 files changed, 138 insertions(+), 22 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java
index 0b825937815..a8ca8a2c4d3 100755
--- a/java/src/main/java/ai/rapids/cudf/Rmm.java
+++ b/java/src/main/java/ai/rapids/cudf/Rmm.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -185,14 +185,30 @@ public static void resetScopedMaximumBytesAllocated() {
    * @throws RmmException if an active handler is already set
    */
   public static void setEventHandler(RmmEventHandler handler) throws RmmException {
+    setEventHandler(handler, false);
+  }
+
+  /**
+   * Sets the event handler to be called on RMM events (e.g.: allocation failure) and
+   * optionally enable debug mode (callbacks on every allocate and deallocate)
+   *
+   * NOTE: Only enable debug mode when necessary, as code will run much slower!
+   *
+   * @param handler event handler to invoke on RMM events or null to clear an existing handler
+   * @param enableDebug if true enable debug callbacks in RmmEventHandler
+   *                    (onAllocated, onDeallocated)
+   * @throws RmmException if an active handler is already set
+   */
+  public static void setEventHandler(RmmEventHandler handler,
+                                     boolean enableDebug) throws RmmException {
     long[] allocThresholds = (handler != null) ? sortThresholds(handler.getAllocThresholds()) : null;
     long[] deallocThresholds = (handler != null) ? sortThresholds(handler.getDeallocThresholds()) : null;
-    setEventHandlerInternal(handler, allocThresholds, deallocThresholds);
+    setEventHandlerInternal(handler, allocThresholds, deallocThresholds, enableDebug);
   }
 
   /** Clears the active RMM event handler if one is set. */
   public static void clearEventHandler() throws RmmException {
-    setEventHandlerInternal(null, null, null);
+    setEventHandlerInternal(null, null, null, false);
   }
 
   private static long[] sortThresholds(long[] thresholds) {
@@ -300,7 +316,8 @@ public static DeviceMemoryBuffer alloc(long size, Cuda.Stream stream) {
   static native void freeDeviceBuffer(long rmmBufferAddress) throws RmmException;
 
   static native void setEventHandlerInternal(RmmEventHandler handler,
-      long[] allocThresholds, long[] deallocThresholds) throws RmmException;
+      long[] allocThresholds, long[] deallocThresholds,
+      boolean enableDebug) throws RmmException;
 
   /**
    * Allocate device memory using `cudaMalloc` and return a pointer to device memory.
diff --git a/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java b/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java
index 19707b85bcb..347ef471a15 100644
--- a/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java
+++ b/java/src/main/java/ai/rapids/cudf/RmmEventHandler.java
@@ -32,6 +32,18 @@ default boolean onAllocFailure(long sizeRequested) {
         "Unexpected invocation of deprecated onAllocFailure without retry count.");
   }
 
+  /**
+   * Invoked after every memory allocation when debug mode is enabled.
+   * @param size number of bytes allocated
+   */
+  default void onAllocated(long size) {}
+
+  /**
+   * Invoked after every memory deallocation when debug mode is enabled.
+   * @param size number of bytes deallocated
+   */
+  default void onDeallocated(long size) {}
+
   /**
    * Invoked on a memory allocation failure.
    * @param sizeRequested number of bytes that failed to allocate
diff --git a/java/src/main/native/src/RmmJni.cpp b/java/src/main/native/src/RmmJni.cpp
index 529345b6bd8..b12f1ed0841 100644
--- a/java/src/main/native/src/RmmJni.cpp
+++ b/java/src/main/native/src/RmmJni.cpp
@@ -90,11 +90,14 @@ class tracking_resource_adaptor final : public base_tracking_resource_adaptor {
 
   void reset_scoped_max_total_allocated(std::size_t initial_value) override {
     std::scoped_lock lock(max_total_allocated_mutex);
-    scoped_allocated = 0;
+    scoped_allocated = initial_value;
     scoped_max_total_allocated = initial_value;
   }
 
-  std::size_t get_scoped_max_total_allocated() override { return scoped_max_total_allocated; }
+  std::size_t get_scoped_max_total_allocated() override {
+    std::scoped_lock lock(max_total_allocated_mutex);
+    return scoped_max_total_allocated;
+  }
 
 private:
   Upstream *const resource;
@@ -123,7 +126,6 @@ class tracking_resource_adaptor final : public base_tracking_resource_adaptor {
     if (result) {
       total_allocated += num_bytes;
       scoped_allocated += num_bytes;
-
       std::scoped_lock lock(max_total_allocated_mutex);
       max_total_allocated = std::max(total_allocated.load(), max_total_allocated);
       scoped_max_total_allocated = std::max(scoped_allocated.load(), scoped_max_total_allocated);
@@ -193,7 +195,7 @@ std::size_t get_scoped_max_total_allocated() {
  * @brief An RMM device memory resource adaptor that delegates to the wrapped resource
  * for most operations but will call Java to handle certain situations (e.g.: allocation failure).
  */
-class java_event_handler_memory_resource final : public device_memory_resource {
+class java_event_handler_memory_resource : public device_memory_resource {
 public:
   java_event_handler_memory_resource(JNIEnv *env, jobject jhandler, jlongArray jalloc_thresholds,
                                      jlongArray jdealloc_thresholds,
@@ -250,8 +252,6 @@ class java_event_handler_memory_resource final : public device_memory_resource {
 
 private:
   device_memory_resource *const resource;
-  JavaVM *jvm;
-  jobject handler_obj;
   jmethodID on_alloc_fail_method;
   bool use_old_alloc_fail_interface;
   jmethodID on_alloc_threshold_method;
@@ -309,6 +309,18 @@ class java_event_handler_memory_resource final : public device_memory_resource {
     }
   }
 
+  bool supports_get_mem_info() const noexcept override { return resource->supports_get_mem_info(); }
+
+  std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view stream) const override {
+    return resource->get_mem_info(stream);
+  }
+
+  bool supports_streams() const noexcept override { return resource->supports_streams(); }
+
+protected:
+  JavaVM *jvm;
+  jobject handler_obj;
+
   void *do_allocate(std::size_t num_bytes, rmm::cuda_stream_view stream) override {
     std::size_t total_before;
     void *result;
@@ -348,20 +360,65 @@ class java_event_handler_memory_resource final : public device_memory_resource {
     check_for_threshold_callback(total_after, total_before, dealloc_thresholds,
                                  on_dealloc_threshold_method, "onDeallocThreshold", total_after);
   }
+};
 
-  bool supports_get_mem_info() const noexcept override { return resource->supports_get_mem_info(); }
+class java_debug_event_handler_memory_resource final : public java_event_handler_memory_resource {
+public:
+  java_debug_event_handler_memory_resource(JNIEnv *env, jobject jhandler,
+                                           jlongArray jalloc_thresholds,
+                                           jlongArray jdealloc_thresholds,
+                                           device_memory_resource *resource_to_wrap)
+      : java_event_handler_memory_resource(env, jhandler, jalloc_thresholds, jdealloc_thresholds,
+                                           resource_to_wrap) {
+    jclass cls = env->GetObjectClass(jhandler);
+    if (cls == nullptr) {
+      throw cudf::jni::jni_exception("class not found");
+    }
 
-  std::pair<size_t, size_t> do_get_mem_info(rmm::cuda_stream_view stream) const override {
-    return resource->get_mem_info(stream);
+    on_allocated_method = env->GetMethodID(cls, "onAllocated", "(J)V");
+    if (on_allocated_method == nullptr) {
+      throw cudf::jni::jni_exception("onAllocated method");
+    }
+
+    on_deallocated_method = env->GetMethodID(cls, "onDeallocated", "(J)V");
+    if (on_deallocated_method == nullptr) {
+      throw cudf::jni::jni_exception("onDeallocated method");
+    }
   }
 
-  bool supports_streams() const noexcept override { return resource->supports_streams(); }
+private:
+  jmethodID on_allocated_method;
+  jmethodID on_deallocated_method;
+
+  void on_allocated_callback(std::size_t num_bytes, rmm::cuda_stream_view stream) {
+    JNIEnv *env = cudf::jni::get_jni_env(jvm);
+    env->CallVoidMethod(handler_obj, on_allocated_method, num_bytes);
+    if (env->ExceptionCheck()) {
+      throw std::runtime_error("onAllocated handler threw an exception");
+    }
+  }
+
+  void on_deallocated_callback(void *p, std::size_t size, rmm::cuda_stream_view stream) {
+    JNIEnv *env = cudf::jni::get_jni_env(jvm);
+    env->CallVoidMethod(handler_obj, on_deallocated_method, size);
+  }
+
+  void *do_allocate(std::size_t num_bytes, rmm::cuda_stream_view stream) override {
+    void *result = java_event_handler_memory_resource::do_allocate(num_bytes, stream);
+    on_allocated_callback(num_bytes, stream);
+    return result;
+  }
+
+  void do_deallocate(void *p, std::size_t size, rmm::cuda_stream_view stream) override {
+    java_event_handler_memory_resource::do_deallocate(p, size, stream);
+    on_deallocated_callback(p, size, stream);
+  }
 };
 
 std::unique_ptr<java_event_handler_memory_resource> Java_memory_resource{};
 
 void set_java_device_memory_resource(JNIEnv *env, jobject handler_obj, jlongArray jalloc_thresholds,
-                                     jlongArray jdealloc_thresholds) {
+                                     jlongArray jdealloc_thresholds, jboolean enable_debug) {
   if (Java_memory_resource && handler_obj != nullptr) {
     JNI_THROW_NEW(env, RMM_EXCEPTION_CLASS, "Another event handler is already set", )
   }
@@ -378,8 +435,13 @@ void set_java_device_memory_resource(JNIEnv *env, jobject handler_obj, jlongArra
   }
   if (handler_obj != nullptr) {
     auto resource = rmm::mr::get_current_device_resource();
-    Java_memory_resource.reset(new java_event_handler_memory_resource(
-        env, handler_obj, jalloc_thresholds, jdealloc_thresholds, resource));
+    if (enable_debug) {
+      Java_memory_resource.reset(new java_debug_event_handler_memory_resource(
+          env, handler_obj, jalloc_thresholds, jdealloc_thresholds, resource));
+    } else {
+      Java_memory_resource.reset(new java_event_handler_memory_resource(
+          env, handler_obj, jalloc_thresholds, jdealloc_thresholds, resource));
+    }
     auto replaced_resource = rmm::mr::set_current_device_resource(Java_memory_resource.get());
     if (resource != replaced_resource) {
       rmm::mr::set_current_device_resource(replaced_resource);
@@ -493,7 +555,7 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_shutdownInternal(JNIEnv *env, jclass clazz) {
   try {
     cudf::jni::auto_set_device(env);
-    set_java_device_memory_resource(env, nullptr, nullptr, nullptr);
+    set_java_device_memory_resource(env, nullptr, nullptr, nullptr, false);
     // Instead of trying to undo all of the adaptors that we added in reverse order
     // we just reset the base adaptor so the others will not be called any more
     // and then clean them up in really any order.  There should be no interaction with
@@ -517,7 +579,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Rmm_getMaximumTotalBytesAllocated(JN
 }
 
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_resetScopedMaximumBytesAllocatedInternal(
-    JNIEnv *env, jclass, long initialValue) {
+    JNIEnv *env, jclass, jlong initialValue) {
   reset_scoped_max_total_allocated(initialValue);
 }
 
@@ -562,9 +624,10 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_freeDeviceBuffer(JNIEnv *env, jcl
 
 JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_setEventHandlerInternal(
     JNIEnv *env, jclass, jobject handler_obj, jlongArray jalloc_thresholds,
-    jlongArray jdealloc_thresholds) {
+    jlongArray jdealloc_thresholds, jboolean enable_debug) {
   try {
-    set_java_device_memory_resource(env, handler_obj, jalloc_thresholds, jdealloc_thresholds);
+    set_java_device_memory_resource(env, handler_obj, jalloc_thresholds, jdealloc_thresholds,
+                                    enable_debug);
   }
   CATCH_STD(env, )
 }
diff --git a/java/src/test/java/ai/rapids/cudf/RmmTest.java b/java/src/test/java/ai/rapids/cudf/RmmTest.java
index 18ff5f4081e..c081f51c9f2 100644
--- a/java/src/test/java/ai/rapids/cudf/RmmTest.java
+++ b/java/src/test/java/ai/rapids/cudf/RmmTest.java
@@ -115,7 +115,7 @@ public void testScopedMaxOutstanding(int rmmAllocMode) {
     try(DeviceMemoryBuffer ignored3 = Rmm.alloc(1024)) {
       Rmm.resetScopedMaximumBytesAllocated(1024);
       try (DeviceMemoryBuffer ignored4 = Rmm.alloc(20480)) {
-        assertEquals(20480, Rmm.getScopedMaximumBytesAllocated());
+        assertEquals(21504, Rmm.getScopedMaximumBytesAllocated());
         assertEquals(21504, Rmm.getMaximumTotalBytesAllocated());
       }
     }
@@ -157,6 +157,8 @@ public void testEventHandler(int rmmAllocMode) {
     AtomicInteger invokedCount = new AtomicInteger();
     AtomicLong amountRequested = new AtomicLong();
     AtomicInteger timesRetried = new AtomicInteger();
+    AtomicLong totalAllocated = new AtomicLong();
+    AtomicLong totalDeallocated = new AtomicLong();
 
     RmmEventHandler handler = new BaseRmmEventHandler() {
       @Override
@@ -166,6 +168,16 @@ public boolean onAllocFailure(long sizeRequested, int retryCount) {
         amountRequested.set(sizeRequested);
         return count != 3;
       }
+
+      @Override
+      public void onAllocated(long sizeAllocated) {
+        totalAllocated.addAndGet(sizeAllocated);
+      }
+
+      @Override
+      public void onDeallocated(long sizeDeallocated) {
+        totalDeallocated.addAndGet(sizeDeallocated);
+      }
     };
 
     Rmm.initialize(rmmAllocMode, Rmm.logToStderr(), 512 * 1024 * 1024);
@@ -175,6 +187,10 @@ public boolean onAllocFailure(long sizeRequested, int retryCount) {
     assertTrue(addr.address != 0);
     assertEquals(0, invokedCount.get());
 
+    // by default, we don't get callbacks on allocated or deallocated
+    assertEquals(0, totalAllocated.get());
+    assertEquals(0, totalDeallocated.get());
+
     // Try to allocate too much
     long requested = TOO_MUCH_MEMORY;
     try {
@@ -192,6 +208,14 @@ public boolean onAllocFailure(long sizeRequested, int retryCount) {
     requested = 8192;
     addr = Rmm.alloc(requested);
     addr.close();
+
+    // test the debug event handler
+    Rmm.clearEventHandler();
+    Rmm.setEventHandler(handler, /*enableDebug*/ true);
+    addr = Rmm.alloc(1024);
+    addr.close();
+    assertEquals(1024, totalAllocated.get());
+    assertEquals(1024, totalDeallocated.get());
   }
 
   @Test

From 9df2eba7b13d4703c502c75eb36739b5193091ce Mon Sep 17 00:00:00 2001
From: VamsiTallam95 <90267547+VamsiTallam95@users.noreply.github.com>
Date: Fri, 4 Nov 2022 18:01:23 -0500
Subject: [PATCH 128/202] Adding feature Truncate to DataFrame and Series
 (#11435)

This PR closes #9629 by adding truncate feature to DataFrame and Series. Truncates a DataFrame or Series before and after some index value. If the index being truncated contains only datetime values, before and after may be specified as strings instead of Timestamps.

Authors:
  - https://github.com/VamsiTallam95
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11435
---
 python/cudf/cudf/core/indexed_frame.py   | 200 +++++++++++++++++++++++
 python/cudf/cudf/core/series.py          |   4 +
 python/cudf/cudf/tests/test_dataframe.py |  56 +++++++
 python/cudf/cudf/tests/test_series.py    |  41 +++++
 4 files changed, 301 insertions(+)

diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 57469c0ff72..49f7101183e 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -1045,6 +1045,206 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
             zip(self._column_names, data_columns), self._index
         )
 
+    @_cudf_nvtx_annotate
+    def truncate(self, before=None, after=None, axis=0, copy=True):
+        """
+        Truncate a Series or DataFrame before and after some index value.
+
+        This is a useful shorthand for boolean indexing based on index
+        values above or below certain thresholds.
+
+        Parameters
+        ----------
+        before : date, str, int
+            Truncate all rows before this index value.
+        after : date, str, int
+            Truncate all rows after this index value.
+        axis : {0 or 'index', 1 or 'columns'}, optional
+            Axis to truncate. Truncates the index (rows) by default.
+        copy : bool, default is True,
+            Return a copy of the truncated section.
+
+        Returns
+        -------
+            The truncated Series or DataFrame.
+
+        Notes
+        -----
+        If the index being truncated contains only datetime values,
+        `before` and `after` may be specified as strings instead of
+        Timestamps.
+
+        .. pandas-compat::
+            **DataFrame.truncate, Series.truncate**
+
+            The ``copy`` parameter is only present for API compatibility, but
+            ``copy=False`` is not supported. This method always generates a
+            copy.
+
+        Examples
+        --------
+        **Series**
+
+        >>> import cudf
+        >>> cs1 = cudf.Series([1, 2, 3, 4])
+        >>> cs1
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+
+        >>> cs1.truncate(before=1, after=2)
+        1    2
+        2    3
+        dtype: int64
+
+        >>> import cudf
+        >>> dates = cudf.date_range(
+        ...     '2021-01-01 23:45:00', '2021-01-01 23:46:00', freq='s'
+        ... )
+        >>> cs2 = cudf.Series(range(len(dates)), index=dates)
+        >>> cs2
+        2021-01-01 23:45:00     0
+        2021-01-01 23:45:01     1
+        2021-01-01 23:45:02     2
+        2021-01-01 23:45:03     3
+        2021-01-01 23:45:04     4
+        2021-01-01 23:45:05     5
+        2021-01-01 23:45:06     6
+        2021-01-01 23:45:07     7
+        2021-01-01 23:45:08     8
+        2021-01-01 23:45:09     9
+        2021-01-01 23:45:10    10
+        2021-01-01 23:45:11    11
+        2021-01-01 23:45:12    12
+        2021-01-01 23:45:13    13
+        2021-01-01 23:45:14    14
+        2021-01-01 23:45:15    15
+        2021-01-01 23:45:16    16
+        2021-01-01 23:45:17    17
+        2021-01-01 23:45:18    18
+        2021-01-01 23:45:19    19
+        2021-01-01 23:45:20    20
+        2021-01-01 23:45:21    21
+        2021-01-01 23:45:22    22
+        2021-01-01 23:45:23    23
+        2021-01-01 23:45:24    24
+        ...
+        2021-01-01 23:45:56    56
+        2021-01-01 23:45:57    57
+        2021-01-01 23:45:58    58
+        2021-01-01 23:45:59    59
+        dtype: int64
+
+
+        >>> cs2.truncate(
+        ...     before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+        ... )
+        2021-01-01 23:45:18    18
+        2021-01-01 23:45:19    19
+        2021-01-01 23:45:20    20
+        2021-01-01 23:45:21    21
+        2021-01-01 23:45:22    22
+        2021-01-01 23:45:23    23
+        2021-01-01 23:45:24    24
+        2021-01-01 23:45:25    25
+        2021-01-01 23:45:26    26
+        2021-01-01 23:45:27    27
+        dtype: int64
+
+        >>> cs3 = cudf.Series({'A': 1, 'B': 2, 'C': 3, 'D': 4})
+        >>> cs3
+        A    1
+        B    2
+        C    3
+        D    4
+        dtype: int64
+
+        >>> cs3.truncate(before='B', after='C')
+        B    2
+        C    3
+        dtype: int64
+
+        **DataFrame**
+
+        >>> df = cudf.DataFrame({
+        ...     'A': ['a', 'b', 'c', 'd', 'e'],
+        ...     'B': ['f', 'g', 'h', 'i', 'j'],
+        ...     'C': ['k', 'l', 'm', 'n', 'o']
+        ... }, index=[1, 2, 3, 4, 5])
+        >>> df
+           A  B  C
+        1  a  f  k
+        2  b  g  l
+        3  c  h  m
+        4  d  i  n
+        5  e  j  o
+
+        >>> df.truncate(before=2, after=4)
+           A  B  C
+        2  b  g  l
+        3  c  h  m
+        4  d  i  n
+
+        >>> df.truncate(before="A", after="B", axis="columns")
+           A  B
+        1  a  f
+        2  b  g
+        3  c  h
+        4  d  i
+        5  e  j
+
+        >>> import cudf
+        >>> dates = cudf.date_range(
+        ...     '2021-01-01 23:45:00', '2021-01-01 23:46:00', freq='s'
+        ... )
+        >>> df2 = cudf.DataFrame(data={'A': 1, 'B': 2}, index=dates)
+        >>> df2.head()
+                             A  B
+        2021-01-01 23:45:00  1  2
+        2021-01-01 23:45:01  1  2
+        2021-01-01 23:45:02  1  2
+        2021-01-01 23:45:03  1  2
+        2021-01-01 23:45:04  1  2
+
+        >>> df2.truncate(
+        ...     before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+        ... )
+                             A  B
+        2021-01-01 23:45:18  1  2
+        2021-01-01 23:45:19  1  2
+        2021-01-01 23:45:20  1  2
+        2021-01-01 23:45:21  1  2
+        2021-01-01 23:45:22  1  2
+        2021-01-01 23:45:23  1  2
+        2021-01-01 23:45:24  1  2
+        2021-01-01 23:45:25  1  2
+        2021-01-01 23:45:26  1  2
+        2021-01-01 23:45:27  1  2
+        """
+        if not copy:
+            raise ValueError("Truncating with copy=False is not supported.")
+        axis = self._get_axis_from_axis_arg(axis)
+        ax = self._index if axis == 0 else self._data.to_pandas_index()
+
+        if not ax.is_monotonic_increasing and not ax.is_monotonic_decreasing:
+            raise ValueError("truncate requires a sorted index")
+
+        if type(ax) is cudf.core.index.DatetimeIndex:
+            before = pd.to_datetime(before)
+            after = pd.to_datetime(after)
+
+        if before is not None and after is not None and before > after:
+            raise ValueError(f"Truncate: {after} must be after {before}")
+
+        if len(ax) > 1 and ax.is_monotonic_decreasing and ax.nunique() > 1:
+            before, after = after, before
+
+        slicer = [slice(None, None)] * self.ndim
+        slicer[axis] = slice(before, after)
+        return self.loc[tuple(slicer)].copy()
+
     @cached_property
     def loc(self):
         """Select rows and columns by label or boolean mask.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 70e8c3d6860..f54f4b385e6 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -283,6 +283,10 @@ def __setitem__(self, key, value):
         self._frame.iloc[key] = value
 
     def _loc_to_iloc(self, arg):
+        if isinstance(arg, tuple) and arg and isinstance(arg[0], slice):
+            if len(arg) > 1:
+                raise IndexError("Too many Indexers")
+            arg = arg[0]
         if _is_scalar_or_zero_d_array(arg):
             if not _is_non_decimal_numeric_dtype(self._frame.index.dtype):
                 # TODO: switch to cudf.utils.dtypes.is_integer(arg)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 1fcfbe5fc91..58bee95326f 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -289,6 +289,62 @@ def test_axes(data):
         assert_eq(e, a)
 
 
+def test_dataframe_truncate_axis_0():
+    df = cudf.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e"],
+            "B": ["f", "g", "h", "i", "j"],
+            "C": ["k", "l", "m", "n", "o"],
+        },
+        index=[1, 2, 3, 4, 5],
+    )
+    pdf = df.to_pandas()
+
+    expected = pdf.truncate(before=2, after=4, axis="index")
+    actual = df.truncate(before=2, after=4, axis="index")
+    assert_eq(actual, expected)
+
+    expected = pdf.truncate(before=1, after=4, axis=0)
+    actual = df.truncate(before=1, after=4, axis=0)
+    assert_eq(expected, actual)
+
+
+def test_dataframe_truncate_axis_1():
+    df = cudf.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e"],
+            "B": ["f", "g", "h", "i", "j"],
+            "C": ["k", "l", "m", "n", "o"],
+        },
+        index=[1, 2, 3, 4, 5],
+    )
+    pdf = df.to_pandas()
+
+    expected = pdf.truncate(before="A", after="B", axis="columns")
+    actual = df.truncate(before="A", after="B", axis="columns")
+    assert_eq(actual, expected)
+
+    expected = pdf.truncate(before="A", after="B", axis=1)
+    actual = df.truncate(before="A", after="B", axis=1)
+    assert_eq(actual, expected)
+
+
+def test_dataframe_truncate_datetimeindex():
+    dates = cudf.date_range(
+        "2021-01-01 23:45:00", "2021-01-01 23:46:00", freq="s"
+    )
+    df = cudf.DataFrame(data={"A": 1, "B": 2}, index=dates)
+    pdf = df.to_pandas()
+    expected = pdf.truncate(
+        before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+    )
+    actual = df.truncate(
+        before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+    )
+
+    assert_eq(actual, expected)
+
+
 def test_series_init_none():
 
     # test for creating empty series
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index c0b99f56238..d5af2899bb0 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1614,6 +1614,47 @@ def test_axes(data):
         assert_eq(e, a)
 
 
+def test_series_truncate():
+    csr = cudf.Series([1, 2, 3, 4])
+    psr = csr.to_pandas()
+
+    assert_eq(csr.truncate(), psr.truncate())
+    assert_eq(csr.truncate(1, 2), psr.truncate(1, 2))
+    assert_eq(csr.truncate(before=1, after=2), psr.truncate(before=1, after=2))
+
+
+def test_series_truncate_errors():
+    csr = cudf.Series([1, 2, 3, 4])
+    with pytest.raises(ValueError):
+        csr.truncate(axis=1)
+    with pytest.raises(ValueError):
+        csr.truncate(copy=False)
+
+    csr.index = [3, 2, 1, 6]
+    psr = csr.to_pandas()
+    assert_exceptions_equal(
+        lfunc=csr.truncate,
+        rfunc=psr.truncate,
+    )
+
+
+def test_series_truncate_datetimeindex():
+    dates = cudf.date_range(
+        "2021-01-01 23:45:00", "2021-01-02 23:46:00", freq="s"
+    )
+    csr = cudf.Series(range(len(dates)), index=dates)
+    psr = csr.to_pandas()
+
+    assert_eq(
+        csr.truncate(
+            before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+        ),
+        psr.truncate(
+            before="2021-01-01 23:45:18", after="2021-01-01 23:45:27"
+        ),
+    )
+
+
 @pytest.mark.parametrize(
     "data",
     [

From 11b875bbd97053c29e7bbd4e9d2d1e528f9f221b Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <wence@gmx.li>
Date: Fri, 4 Nov 2022 23:57:07 +0000
Subject: [PATCH 129/202] Fix type casting in Series.__setitem__ (#11904)

To mimic pandas, we must upcast a column to the numpy result_type of the column itself and the input value dtype. This previously occurred in all relevant cases except when the index provided to __setitem__ was a single integer (originally introduced in #2442). Closes #11901.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11904
---
 python/cudf/cudf/core/column/column.py |  2 ++
 python/cudf/cudf/core/scalar.py        |  2 ++
 python/cudf/cudf/core/series.py        | 18 ++++++-----
 python/cudf/cudf/tests/test_setitem.py | 45 ++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 22f8d27f9e8..6c17b492f8a 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -847,6 +847,8 @@ def can_cast_safely(self, to_dtype: Dtype) -> bool:
         raise NotImplementedError()
 
     def astype(self, dtype: Dtype, **kwargs) -> ColumnBase:
+        if self.dtype == dtype:
+            return self
         if is_categorical_dtype(dtype):
             return self.as_categorical_column(dtype, **kwargs)
 
diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py
index e05e8662fe4..e516177ad29 100644
--- a/python/cudf/cudf/core/scalar.py
+++ b/python/cudf/cudf/core/scalar.py
@@ -392,4 +392,6 @@ def _dispatch_scalar_unaop(self, op):
         return getattr(self.value, op)()
 
     def astype(self, dtype):
+        if self.dtype == dtype:
+            return self
         return Scalar(self.value, dtype)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f54f4b385e6..8c30ae258db 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -35,6 +35,7 @@
     is_integer_dtype,
     is_list_dtype,
     is_scalar,
+    is_string_dtype,
     is_struct_dtype,
 )
 from cudf.core.abc import Serializable
@@ -214,19 +215,20 @@ def __setitem__(self, key, value):
             value = column.as_column(value)
 
         if (
-            not isinstance(
-                self._frame._column.dtype,
-                (cudf.core.dtypes.DecimalDtype, cudf.CategoricalDtype),
+            (
+                _is_non_decimal_numeric_dtype(self._frame._column.dtype)
+                or is_string_dtype(self._frame._column.dtype)
             )
             and hasattr(value, "dtype")
             and _is_non_decimal_numeric_dtype(value.dtype)
         ):
             # normalize types if necessary:
-            if not is_integer(key):
-                to_dtype = np.result_type(
-                    value.dtype, self._frame._column.dtype
-                )
-                value = value.astype(to_dtype)
+            # In contrast to Column.__setitem__ (which downcasts the value to
+            # the dtype of the column) here we upcast the series to the
+            # larger data type mimicking pandas
+            to_dtype = np.result_type(value.dtype, self._frame._column.dtype)
+            value = value.astype(to_dtype)
+            if to_dtype != self._frame._column.dtype:
                 self._frame._column._mimic_inplace(
                     self._frame._column.astype(to_dtype), inplace=True
                 )
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index 13b342e6c3b..ac9dbecda65 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -297,3 +297,48 @@ def test_series_slice_setitem_struct():
     actual[0:3] = cudf.Scalar({"a": {"b": 5050}, "b": 101})
 
     assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("dtype", [np.int32, np.int64, np.float32, np.float64])
+@pytest.mark.parametrize("indices", [0, [1, 2]])
+def test_series_setitem_upcasting(dtype, indices):
+    sr = pd.Series([0, 0, 0], dtype=dtype)
+    cr = cudf.from_pandas(sr)
+    assert_eq(sr, cr)
+    # Must be a non-integral floating point value that can't be losslessly
+    # converted to float32, otherwise pandas will try and match the source
+    # column dtype.
+    new_value = np.float64(np.pi)
+    col_ref = cr._column
+    sr[indices] = new_value
+    cr[indices] = new_value
+    if PANDAS_GE_150:
+        assert_eq(sr, cr)
+    else:
+        # pandas bug, incorrectly fails to upcast from float32 to float64
+        assert_eq(sr.values, cr.values)
+    if dtype == np.float64:
+        # no-op type cast should not modify backing column
+        assert col_ref == cr._column
+
+
+# TODO: these two tests could perhaps be changed once specifics of
+# pandas compat wrt upcasting are decided on; this is just baking in
+# status-quo.
+def test_series_setitem_upcasting_string_column():
+    sr = pd.Series([0, 0, 0], dtype=str)
+    cr = cudf.from_pandas(sr)
+    new_value = np.float64(10.5)
+    sr[0] = str(new_value)
+    cr[0] = new_value
+    assert_eq(sr, cr)
+
+
+def test_series_setitem_upcasting_string_value():
+    sr = cudf.Series([0, 0, 0], dtype=int)
+    # This is a distinction with pandas, which lets you instead make an
+    # object column with ["10", 0, 0]
+    sr[0] = "10"
+    assert_eq(pd.Series([10, 0, 0], dtype=int), sr)
+    with pytest.raises(ValueError):
+        sr[0] = "non-integer"

From 52dbb63dcf3bf4072705df78915c658491e01b51 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 7 Nov 2022 10:31:54 -0600
Subject: [PATCH 130/202] Fix link to c++ developer guide from
 `CONTRIBUTING.md` (#12084)

Noticed this link was broken when poking around, I think this should fix it.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/12084
---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 6eb621abcc3..9c432b6cd4c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -382,7 +382,7 @@ You can skip these checks with `git commit --no-verify` or with the short versio
 
 ## Developer Guidelines
 
-The [C++ Developer Guide](cpp/docs/DEVELOPER_GUIDE.md) includes details on contributing to libcudf C++ code.
+The [C++ Developer Guide](cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md) includes details on contributing to libcudf C++ code.
 
 The [Python Developer Guide](https://docs.rapids.ai/api/cudf/stable/developer_guide/index.html) includes details on contributing to cuDF Python code.
 

From 262631bf6ecd0af2ab6f6daa7c62c72c11a9c22a Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 8 Nov 2022 01:24:14 +0530
Subject: [PATCH 131/202] Fix ingest_raw_data performance issue in Nested JSON
 reader due to RVO (#12070)

Issue is that `json::experimental::ingest_raw_data` took double the time of `json::ingest_raw_data` for same data.

After replacing tertiary operator with `if` `else`, runtime for 500 MB file is same as `json::ingest_raw_data`
I suspect, RVO (copy elision) is skipped while using tertiary operator.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Elias Stehle (https://github.com/elstehle)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/12070
---
 cpp/src/io/json/experimental/read_json.cpp | 46 +++++++++++++++++-----
 cpp/src/io/json/reader_impl.cu             |  4 ++
 2 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index c0eaa43e68f..b0b7d5baa0f 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -19,27 +19,49 @@
 #include <io/comp/io_uncomp.hpp>
 #include <io/json/nested_json.hpp>
 
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/utilities/error.hpp>
 
 #include <numeric>
 
 namespace cudf::io::detail::json::experimental {
 
-std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> sources,
-                                      compression_type compression)
+size_t sources_size(host_span<std::unique_ptr<datasource>> const sources,
+                    size_t range_offset,
+                    size_t range_size)
 {
-  auto const total_source_size =
-    std::accumulate(sources.begin(), sources.end(), 0ul, [](size_t sum, auto& source) {
-      return sum + source->size();
-    });
-  auto buffer = std::vector<uint8_t>(total_source_size);
+  return std::accumulate(sources.begin(), sources.end(), 0ul, [=](size_t sum, auto& source) {
+    auto const size = source->size();
+    // TODO take care of 0, 0, or *, 0 case.
+    return sum +
+           (range_size == 0 or range_offset + range_size > size ? size - range_offset : range_size);
+  });
+}
+
+std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> const& sources,
+                                      compression_type compression,
+                                      size_t range_offset,
+                                      size_t range_size)
+{
+  CUDF_FUNC_RANGE();
+  // Iterate through the user defined sources and read the contents into the local buffer
+  auto const total_source_size = sources_size(sources, range_offset, range_size);
+  auto buffer                  = std::vector<uint8_t>(total_source_size);
 
   size_t bytes_read = 0;
   for (const auto& source : sources) {
-    bytes_read += source->host_read(0, source->size(), buffer.data() + bytes_read);
+    if (!source->is_empty()) {
+      auto data_size   = (range_size != 0) ? range_size : source->size();
+      auto destination = buffer.data() + bytes_read;
+      bytes_read += source->host_read(range_offset, data_size, destination);
+    }
   }
 
-  return (compression == compression_type::NONE) ? buffer : decompress(compression, buffer);
+  if (compression == compression_type::NONE) {
+    return buffer;
+  } else {
+    return decompress(compression, buffer);
+  }
 }
 
 table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
@@ -47,10 +69,14 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   CUDF_EXPECTS(reader_opts.get_byte_range_offset() == 0 and reader_opts.get_byte_range_size() == 0,
                "specifying a byte range is not yet supported");
 
-  auto const buffer = ingest_raw_input(sources, reader_opts.get_compression());
+  auto const buffer = ingest_raw_input(sources,
+                                       reader_opts.get_compression(),
+                                       reader_opts.get_byte_range_offset(),
+                                       reader_opts.get_byte_range_size());
   auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
 
   try {
diff --git a/cpp/src/io/json/reader_impl.cu b/cpp/src/io/json/reader_impl.cu
index 48b2af81fcd..4bbe91b61d2 100644
--- a/cpp/src/io/json/reader_impl.cu
+++ b/cpp/src/io/json/reader_impl.cu
@@ -26,6 +26,7 @@
 #include <io/utilities/type_conversion.hpp>
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/groupby.hpp>
@@ -222,6 +223,7 @@ std::vector<uint8_t> ingest_raw_input(std::vector<std::unique_ptr<datasource>> c
                                       size_t range_size,
                                       size_t range_size_padded)
 {
+  CUDF_FUNC_RANGE();
   // Iterate through the user defined sources and read the contents into the local buffer
   size_t total_source_size = 0;
   for (const auto& source : sources) {
@@ -313,6 +315,7 @@ rmm::device_uvector<char> upload_data_to_device(json_reader_options const& reade
                                                 rmm::device_uvector<uint64_t>& rec_starts,
                                                 rmm::cuda_stream_view stream)
 {
+  CUDF_FUNC_RANGE();
   size_t end_offset = h_data.size();
 
   // Trim lines that are outside range
@@ -592,6 +595,7 @@ table_with_metadata read_json(std::vector<std::unique_ptr<datasource>>& sources,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
+  CUDF_FUNC_RANGE();
   if (reader_opts.is_enabled_experimental()) {
     return experimental::read_json(sources, reader_opts, stream, mr);
   }

From 17b6b2e4f3448be183f026d1cc437f5331c6077c Mon Sep 17 00:00:00 2001
From: Charles Blackmon-Luca <20627856+charlesbluca@users.noreply.github.com>
Date: Mon, 7 Nov 2022 14:58:13 -0500
Subject: [PATCH 132/202] Add checks for HLG layers in dask-cudf groupby tests
 (#10853)

This PR adds helper function `check_groupby_result` to dask-cudf's groupby tests, and is used in the basic tests to ensure that we are using dask-cudf's `groupby_agg` function to compute the result as expected.

I also expanded `test_groupby_agg` to test all supported aggregations, and removed tests that were made superfluous by this change.

Authors:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

Approvers:
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/10853
---
 python/dask_cudf/dask_cudf/groupby.py         |  76 +++++-----
 .../dask_cudf/dask_cudf/tests/test_groupby.py | 136 +++++++++---------
 2 files changed, 101 insertions(+), 111 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
index f5258e6cab8..54f8958c9eb 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/groupby.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from functools import wraps
 from typing import Set
 
 import numpy as np
@@ -16,12 +17,8 @@
 import cudf
 from cudf.utils.utils import _dask_cudf_nvtx_annotate
 
-CUMULATIVE_AGGS = (
-    "cumsum",
-    "cumcount",
-)
-
-AGGS = (
+# aggregations that are dask-cudf optimized
+OPTIMIZED_AGGS = (
     "count",
     "mean",
     "std",
@@ -34,19 +31,18 @@
     "last",
 )
 
-SUPPORTED_AGGS = (*AGGS, *CUMULATIVE_AGGS)
-
 
-def _check_groupby_supported(func):
+def _check_groupby_optimized(func):
     """
     Decorator for dask-cudf's groupby methods that returns the dask-cudf
-    method if the groupby object is supported, otherwise reverting to the
-    upstream Dask method
+    optimized method if the groupby object is supported, otherwise
+    reverting to the upstream Dask method
     """
 
+    @wraps(func)
     def wrapper(*args, **kwargs):
         gb = args[0]
-        if _groupby_supported(gb):
+        if _groupby_optimized(gb):
             return func(*args, **kwargs)
         # note that we use upstream Dask's default kwargs for this call if
         # none are specified; this shouldn't be an issue as those defaults are
@@ -94,7 +90,7 @@ def _make_groupby_method_aggs(self, agg_name):
         return {c: agg_name for c in self.obj.columns if c != self.by}
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def count(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -109,7 +105,7 @@ def count(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def mean(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -124,7 +120,7 @@ def mean(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def std(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -139,7 +135,7 @@ def std(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def var(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -154,7 +150,7 @@ def var(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def sum(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -169,7 +165,7 @@ def sum(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def min(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -184,7 +180,7 @@ def min(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def max(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -199,7 +195,7 @@ def max(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def collect(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -214,7 +210,7 @@ def collect(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def first(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -229,7 +225,7 @@ def first(self, split_every=None, split_out=1):
         )
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def last(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -250,7 +246,7 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
 
         arg = _redirect_aggs(arg)
 
-        if _groupby_supported(self) and _aggs_supported(arg, SUPPORTED_AGGS):
+        if _groupby_optimized(self) and _aggs_optimized(arg, OPTIMIZED_AGGS):
             if isinstance(self._meta.grouping.keys, cudf.MultiIndex):
                 keys = self._meta.grouping.keys.names
             else:
@@ -287,7 +283,7 @@ def __init__(self, *args, sort=None, **kwargs):
         super().__init__(*args, sort=sort, **kwargs)
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def count(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -302,7 +298,7 @@ def count(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def mean(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -317,7 +313,7 @@ def mean(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def std(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -332,7 +328,7 @@ def std(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def var(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -347,7 +343,7 @@ def var(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def sum(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -362,7 +358,7 @@ def sum(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def min(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -377,7 +373,7 @@ def min(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def max(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -392,7 +388,7 @@ def max(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def collect(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -407,7 +403,7 @@ def collect(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def first(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -422,7 +418,7 @@ def first(self, split_every=None, split_out=1):
         )[self._slice]
 
     @_dask_cudf_nvtx_annotate
-    @_check_groupby_supported
+    @_check_groupby_optimized
     def last(self, split_every=None, split_out=1):
         return groupby_agg(
             self.obj,
@@ -446,7 +442,7 @@ def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
         if not isinstance(arg, dict):
             arg = {self._slice: arg}
 
-        if _groupby_supported(self) and _aggs_supported(arg, SUPPORTED_AGGS):
+        if _groupby_optimized(self) and _aggs_optimized(arg, OPTIMIZED_AGGS):
             return groupby_agg(
                 self.obj,
                 self.by,
@@ -569,9 +565,9 @@ def groupby_agg(
     """
     # Assert that aggregations are supported
     aggs = _redirect_aggs(aggs_in)
-    if not _aggs_supported(aggs, SUPPORTED_AGGS):
+    if not _aggs_optimized(aggs, OPTIMIZED_AGGS):
         raise ValueError(
-            f"Supported aggs include {SUPPORTED_AGGS} for groupby_agg API. "
+            f"Supported aggs include {OPTIMIZED_AGGS} for groupby_agg API. "
             f"Aggregations must be specified with dict or list syntax."
         )
 
@@ -735,7 +731,7 @@ def _redirect_aggs(arg):
 
 
 @_dask_cudf_nvtx_annotate
-def _aggs_supported(arg, supported: set):
+def _aggs_optimized(arg, supported: set):
     """Check that aggregations in `arg` are a subset of `supported`"""
     if isinstance(arg, (list, dict)):
         if isinstance(arg, dict):
@@ -757,8 +753,8 @@ def _aggs_supported(arg, supported: set):
 
 
 @_dask_cudf_nvtx_annotate
-def _groupby_supported(gb):
-    """Check that groupby input is supported by dask-cudf"""
+def _groupby_optimized(gb):
+    """Check that groupby input can use dask-cudf optimized codepath"""
     return isinstance(gb.obj, DaskDataFrame) and (
         isinstance(gb.by, str)
         or (isinstance(gb.by, list) and all(isinstance(x, str) for x in gb.by))
@@ -830,7 +826,7 @@ def _tree_node_agg(df, gb_cols, dropna, sort, sep):
         agg = col.split(sep)[-1]
         if agg in ("count", "sum"):
             agg_dict[col] = ["sum"]
-        elif agg in SUPPORTED_AGGS:
+        elif agg in OPTIMIZED_AGGS:
             agg_dict[col] = [agg]
         else:
             raise ValueError(f"Unexpected aggregation: {agg}")
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index f2047c34684..e43fead0b63 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -6,16 +6,28 @@
 
 import dask
 from dask import dataframe as dd
+from dask.utils_test import hlg_layer
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120
 
 import dask_cudf
-from dask_cudf.groupby import AGGS, CUMULATIVE_AGGS, _aggs_supported
+from dask_cudf.groupby import OPTIMIZED_AGGS, _aggs_optimized
 
 
-@pytest.fixture
-def pdf():
+def assert_cudf_groupby_layers(ddf):
+    for prefix in ("cudf-aggregate-chunk", "cudf-aggregate-agg"):
+        try:
+            hlg_layer(ddf.dask, prefix)
+        except KeyError:
+            raise AssertionError(
+                "Expected Dask dataframe to contain groupby layer with "
+                f"prefix {prefix}"
+            )
+
+
+@pytest.fixture(params=["non_null", "null"])
+def pdf(request):
     np.random.seed(0)
 
     # note that column name "x" is a substring of the groupby key;
@@ -27,13 +39,17 @@ def pdf():
             "y": np.random.normal(size=10000),
         }
     )
+
+    # insert nulls into dataframe at random
+    if request.param == "null":
+        pdf = pdf.mask(np.random.choice([True, False], size=pdf.shape))
+
     return pdf
 
 
-@pytest.mark.parametrize("aggregation", AGGS)
+@pytest.mark.parametrize("aggregation", OPTIMIZED_AGGS)
 @pytest.mark.parametrize("series", [False, True])
 def test_groupby_basic(series, aggregation, pdf):
-
     gdf = cudf.DataFrame.from_pandas(pdf)
     gdf_grouped = gdf.groupby("xx")
     ddf_grouped = dask_cudf.from_cudf(gdf, npartitions=5).groupby("xx")
@@ -42,30 +58,38 @@ def test_groupby_basic(series, aggregation, pdf):
         gdf_grouped = gdf_grouped.xx
         ddf_grouped = ddf_grouped.xx
 
-    a = getattr(gdf_grouped, aggregation)()
-    b = getattr(ddf_grouped, aggregation)().compute()
+    check_dtype = aggregation != "count"
 
-    if aggregation == "count":
-        dd.assert_eq(a, b, check_dtype=False)
-    else:
-        dd.assert_eq(a, b)
+    expect = getattr(gdf_grouped, aggregation)()
+    actual = getattr(ddf_grouped, aggregation)()
 
-    a = gdf_grouped.agg({"xx": aggregation})
-    b = ddf_grouped.agg({"xx": aggregation}).compute()
+    assert_cudf_groupby_layers(actual)
 
-    if aggregation == "count":
-        dd.assert_eq(a, b, check_dtype=False)
-    else:
-        dd.assert_eq(a, b)
+    dd.assert_eq(expect, actual, check_dtype=check_dtype)
+
+    expect = gdf_grouped.agg({"xx": aggregation})
+    actual = ddf_grouped.agg({"xx": aggregation})
+
+    assert_cudf_groupby_layers(actual)
+
+    dd.assert_eq(expect, actual, check_dtype=check_dtype)
 
 
+# TODO: explore adding support with `.agg()`
 @pytest.mark.parametrize("series", [True, False])
-@pytest.mark.parametrize("aggregation", CUMULATIVE_AGGS)
+@pytest.mark.parametrize("aggregation", ["cumsum", "cumcount"])
 def test_groupby_cumulative(aggregation, pdf, series):
     gdf = cudf.DataFrame.from_pandas(pdf)
     ddf = dask_cudf.from_cudf(gdf, npartitions=5)
 
-    gdf_grouped = gdf.groupby("xx")
+    if pdf.isna().sum().any():
+        with pytest.xfail(
+            reason="https://github.com/rapidsai/cudf/issues/12055"
+        ):
+            gdf_grouped = gdf.groupby("xx")
+    else:
+        gdf_grouped = gdf.groupby("xx")
+
     ddf_grouped = ddf.groupby("xx")
 
     if series:
@@ -73,7 +97,7 @@ def test_groupby_cumulative(aggregation, pdf, series):
         ddf_grouped = ddf_grouped.xx
 
     a = getattr(gdf_grouped, aggregation)()
-    b = getattr(ddf_grouped, aggregation)().compute()
+    b = getattr(ddf_grouped, aggregation)()
 
     if aggregation == "cumsum" and series:
         with pytest.xfail(reason="https://github.com/dask/dask/issues/9313"):
@@ -82,37 +106,31 @@ def test_groupby_cumulative(aggregation, pdf, series):
         dd.assert_eq(a, b)
 
 
+@pytest.mark.parametrize("aggregation", OPTIMIZED_AGGS)
 @pytest.mark.parametrize(
     "func",
     [
-        lambda df: df.groupby("x").agg({"y": "max"}),
-        lambda df: df.groupby("x").agg(["sum", "max"]),
-        lambda df: df.groupby("x").y.agg(["sum", "max"]),
-        lambda df: df.groupby("x").agg("sum"),
-        lambda df: df.groupby("x").y.agg("sum"),
+        lambda df, agg: df.groupby("xx").agg({"y": agg}),
+        lambda df, agg: df.groupby("xx").y.agg({"y": agg}),
+        lambda df, agg: df.groupby("xx").agg([agg]),
+        lambda df, agg: df.groupby("xx").y.agg([agg]),
+        lambda df, agg: df.groupby("xx").agg(agg),
+        lambda df, agg: df.groupby("xx").y.agg(agg),
     ],
 )
-def test_groupby_agg(func):
-    pdf = pd.DataFrame(
-        {
-            "x": np.random.randint(0, 5, size=10000),
-            "y": np.random.normal(size=10000),
-        }
-    )
-
+def test_groupby_agg(func, aggregation, pdf):
     gdf = cudf.DataFrame.from_pandas(pdf)
 
     ddf = dask_cudf.from_cudf(gdf, npartitions=5)
 
-    a = func(gdf).to_pandas()
-    b = func(ddf).compute().to_pandas()
+    actual = func(ddf, aggregation)
+    expect = func(gdf, aggregation)
 
-    a.index.name = None
-    a.name = None
-    b.index.name = None
-    b.name = None
+    check_dtype = aggregation != "count"
 
-    dd.assert_eq(a, b)
+    assert_cudf_groupby_layers(actual)
+
+    dd.assert_eq(expect, actual, check_names=False, check_dtype=check_dtype)
 
 
 @pytest.mark.parametrize("split_out", [1, 3])
@@ -136,28 +154,6 @@ def test_groupby_agg_empty_partition(tmpdir, split_out):
     dd.assert_eq(gb.compute().sort_index(), expect)
 
 
-@pytest.mark.parametrize(
-    "func",
-    [lambda df: df.groupby("x").std(), lambda df: df.groupby("x").y.std()],
-)
-def test_groupby_std(func):
-    pdf = pd.DataFrame(
-        {
-            "x": np.random.randint(0, 5, size=10000),
-            "y": np.random.normal(size=10000),
-        }
-    )
-
-    gdf = cudf.DataFrame.from_pandas(pdf)
-
-    ddf = dask_cudf.from_cudf(gdf, npartitions=5)
-
-    a = func(gdf).to_pandas()
-    b = func(ddf).compute().to_pandas()
-
-    dd.assert_eq(a, b)
-
-
 # reason gotattr in cudf
 @pytest.mark.parametrize(
     "func",
@@ -710,7 +706,7 @@ def test_groupby_agg_redirect(aggregations):
     ],
 )
 def test_is_supported(arg, supported):
-    assert _aggs_supported(arg, AGGS) is supported
+    assert _aggs_optimized(arg, OPTIMIZED_AGGS) is supported
 
 
 def test_groupby_unique_lists():
@@ -746,22 +742,20 @@ def test_groupby_first_last(data, agg):
     gddf = dask_cudf.from_cudf(gdf, npartitions=2)
 
     dd.assert_eq(
-        ddf.groupby("a").agg(agg).compute(),
-        gddf.groupby("a").agg(agg).compute(),
+        ddf.groupby("a").agg(agg),
+        gddf.groupby("a").agg(agg),
     )
 
     dd.assert_eq(
-        getattr(ddf.groupby("a"), agg)().compute(),
-        getattr(gddf.groupby("a"), agg)().compute(),
+        getattr(ddf.groupby("a"), agg)(),
+        getattr(gddf.groupby("a"), agg)(),
     )
 
-    dd.assert_eq(
-        gdf.groupby("a").agg(agg), gddf.groupby("a").agg(agg).compute()
-    )
+    dd.assert_eq(gdf.groupby("a").agg(agg), gddf.groupby("a").agg(agg))
 
     dd.assert_eq(
         getattr(gdf.groupby("a"), agg)(),
-        getattr(gddf.groupby("a"), agg)().compute(),
+        getattr(gddf.groupby("a"), agg)(),
     )
 
 
From f9a25129238713f2e632e5cb0fcba53a7c34a090 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Mon, 7 Nov 2022 18:21:27 -0500
Subject: [PATCH 133/202] Fix quantile gtests coded in namespace cudf::test
 (#12049)

Fixes `cpp/tests/quantiles` gtests source files coded in namespace `cudf::test`
The `tdigest_utilities.cu` was moved to `cpp/tests/utilities` since it is used by quantiles, groupby, reductions tests. Also, the header for the functions defined in this source file is in `cpp/include/cudf_tests/`.

The `cpp/include/cudf_tests/tdigest_utilities.cuh` was also including a source file header from `cudf/tests/groupby` which seemed odd and was corrected by moving the code it needed directly into the `tdigest_utilities.cuh` header. These functions were used by quantiles, groupby, reductions, etc so it made sense for them to be moved into this utility header.

Simple reworking some of the code in `percentile_approx_test.cu` allowed it to become a `.cpp` file as well.
Also made some minor changes to the `tdigest_column_view` class to isolate a functor inside the class instead of the namespace scope.

No function or test has changed just the source code reworked or moved around.

Reference #11734

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12049
---
 cpp/CMakeLists.txt                            |   2 +-
 .../cudf/tdigest/tdigest_column_view.cuh      |  36 ++-
 cpp/include/cudf_test/tdigest_utilities.cuh   |  58 ++++-
 .../quantiles/tdigest/tdigest_column_view.cpp |   4 +-
 cpp/tests/CMakeLists.txt                      |   2 +-
 cpp/tests/groupby/groupby_test_util.hpp       |  54 +----
 ...rox_test.cu => percentile_approx_test.cpp} | 207 +++++++++---------
 cpp/tests/quantiles/quantile_test.cpp         | 176 +++++++--------
 cpp/tests/quantiles/quantiles_test.cpp        | 127 +++++------
 .../tdigest_utilities.cu                      |   0
 10 files changed, 323 insertions(+), 343 deletions(-)
 rename cpp/tests/quantiles/{percentile_approx_test.cu => percentile_approx_test.cpp} (69%)
 rename cpp/tests/{quantiles => utilities}/tdigest_utilities.cu (100%)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 03cf4c7d2b7..75de15bdf22 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -690,10 +690,10 @@ add_library(cudf::cudf ALIAS cudf)
 add_library(
   cudftestutil STATIC
   tests/io/metadata_utilities.cpp
-  tests/quantiles/tdigest_utilities.cu
   tests/utilities/base_fixture.cpp
   tests/utilities/column_utilities.cu
   tests/utilities/table_utilities.cu
+  tests/utilities/tdigest_utilities.cu
 )
 
 set_target_properties(
diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.cuh b/cpp/include/cudf/tdigest/tdigest_column_view.cuh
index 64371fd5c45..0ffd9578126 100644
--- a/cpp/include/cudf/tdigest/tdigest_column_view.cuh
+++ b/cpp/include/cudf/tdigest/tdigest_column_view.cuh
@@ -22,24 +22,6 @@
 namespace cudf {
 namespace tdigest {
 
-/**
- * @brief Functor to compute the size of each tdigest of a column.
- *
- */
-struct tdigest_size {
-  size_type const* offsets;  ///< Offsets of the t-digest column
-  /**
-   * @brief Returns size of the each tdigest in the column
-   *
-   * @param tdigest_index Index of the tdigest in the column
-   * @return Size of the tdigest
-   */
-  __device__ size_type operator()(size_type tdigest_index)
-  {
-    return offsets[tdigest_index + 1] - offsets[tdigest_index];
-  }
-};
-
 /**
  * @brief Given a column_view containing tdigest data, an instance of this class
  * provides a wrapper on the compound column for tdigest operations.
@@ -127,6 +109,22 @@ class tdigest_column_view : private column_view {
    */
   [[nodiscard]] column_view weights() const;
 
+  /**
+   * @brief Functor to compute the size of each tdigest of a column.
+   */
+  struct tdigest_size_fn {
+    size_type const* offsets;  ///< Offsets of the t-digest column
+    /**
+     * @brief Returns size of the each tdigest in the column
+     *
+     * @param tdigest_index Index of the tdigest in the column
+     * @return Size of the tdigest
+     */
+    __device__ size_type operator()(size_type tdigest_index)
+    {
+      return offsets[tdigest_index + 1] - offsets[tdigest_index];
+    }
+  };
   /**
    * @brief Returns an iterator that returns the size of each tdigest
    * in the column (each row is 1 digest)
@@ -136,7 +134,7 @@ class tdigest_column_view : private column_view {
   [[nodiscard]] auto size_begin() const
   {
     return cudf::detail::make_counting_transform_iterator(
-      0, tdigest_size{centroids().offsets_begin()});
+      0, tdigest_size_fn{centroids().offsets_begin()});
   }
 
   /**
diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
index 250f8ea8580..11a8d8715a0 100644
--- a/cpp/include/cudf_test/tdigest_utilities.cuh
+++ b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -16,16 +16,14 @@
 
 #pragma once
 
+#include <cudf_test/column_wrapper.hpp>
+
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/groupby.hpp>
 #include <cudf/tdigest/tdigest_column_view.cuh>
 #include <cudf/utilities/default_stream.hpp>
 
-#include <cudf_test/column_wrapper.hpp>
-
-#include <tests/groupby/groupby_test_util.hpp>
-
 #include <thrust/device_vector.h>
 #include <thrust/execution_policy.h>
 #include <thrust/extrema.h>
@@ -102,6 +100,58 @@ struct tdigest_gen {
   // @endcond
 };
 
+template <typename T>
+inline T frand()
+{
+  return static_cast<T>(rand()) / static_cast<T>(RAND_MAX);
+}
+
+template <typename T>
+inline T rand_range(T min, T max)
+{
+  return min + static_cast<T>(frand<T>() * (max - min));
+}
+
+inline std::unique_ptr<column> generate_typed_percentile_distribution(
+  std::vector<double> const& buckets,
+  std::vector<int> const& sizes,
+  data_type t,
+  bool sorted = false)
+{
+  srand(0);
+
+  std::vector<double> values;
+  size_t total_size = std::reduce(sizes.begin(), sizes.end(), 0);
+  values.reserve(total_size);
+  for (size_t idx = 0; idx < sizes.size(); idx++) {
+    double min = idx == 0 ? 0.0f : buckets[idx - 1];
+    double max = buckets[idx];
+
+    for (int v_idx = 0; v_idx < sizes[idx]; v_idx++) {
+      values.push_back(rand_range(min, max));
+    }
+  }
+
+  if (sorted) { std::sort(values.begin(), values.end()); }
+
+  cudf::test::fixed_width_column_wrapper<double> src(values.begin(), values.end());
+  return cudf::cast(src, t);
+}
+
+// "standardized" means the parameters sent into generate_typed_percentile_distribution. the intent
+// is to provide a standardized set of inputs for use with tdigest generation tests and
+// percentile_approx tests. std::vector<double>
+// buckets{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}; std::vector<int>
+// sizes{50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
+inline std::unique_ptr<column> generate_standardized_percentile_distribution(
+  data_type t = data_type{type_id::FLOAT64}, bool sorted = false)
+{
+  std::vector<double> buckets{10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0, 90.0f, 100.0f};
+  std::vector<int> b_sizes{
+    50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
+  return generate_typed_percentile_distribution(buckets, b_sizes, t, sorted);
+}
+
 /**
  * @brief Compare a tdigest column against a sampling of expected values.
  */
diff --git a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
index a86b40fd64a..df95c1d9da8 100644
--- a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
+++ b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,6 @@
 namespace cudf {
 namespace tdigest {
 
-using namespace cudf;
-
 tdigest_column_view::tdigest_column_view(column_view const& col) : column_view(col)
 {
   // sanity check that this is actually tdigest data
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 8675dc891c1..5ff2e9bf6d6 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -146,7 +146,7 @@ ConfigureTest(HASH_MAP_TEST hash_map/map_test.cu)
 # ##################################################################################################
 # * quantiles tests -------------------------------------------------------------------------------
 ConfigureTest(
-  QUANTILES_TEST quantiles/percentile_approx_test.cu quantiles/quantile_test.cpp
+  QUANTILES_TEST quantiles/percentile_approx_test.cpp quantiles/quantile_test.cpp
   quantiles/quantiles_test.cpp
 )
 
diff --git a/cpp/tests/groupby/groupby_test_util.hpp b/cpp/tests/groupby/groupby_test_util.hpp
index b333d9dacba..83f522ed913 100644
--- a/cpp/tests/groupby/groupby_test_util.hpp
+++ b/cpp/tests/groupby/groupby_test_util.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -131,57 +131,5 @@ inline void test_single_scan(column_view const& keys,
     expect_vals, *result.second[0].results[0], debug_output_level::ALL_ERRORS);
 }
 
-template <typename T>
-inline T frand()
-{
-  return static_cast<T>(rand()) / static_cast<T>(RAND_MAX);
-}
-
-template <typename T>
-inline T rand_range(T min, T max)
-{
-  return min + static_cast<T>(frand<T>() * (max - min));
-}
-
-inline std::unique_ptr<column> generate_typed_percentile_distribution(
-  std::vector<double> const& buckets,
-  std::vector<int> const& sizes,
-  data_type t,
-  bool sorted = false)
-{
-  srand(0);
-
-  std::vector<double> values;
-  size_t total_size = std::reduce(sizes.begin(), sizes.end(), 0);
-  values.reserve(total_size);
-  for (size_t idx = 0; idx < sizes.size(); idx++) {
-    double min = idx == 0 ? 0.0f : buckets[idx - 1];
-    double max = buckets[idx];
-
-    for (int v_idx = 0; v_idx < sizes[idx]; v_idx++) {
-      values.push_back(rand_range(min, max));
-    }
-  }
-
-  if (sorted) { std::sort(values.begin(), values.end()); }
-
-  cudf::test::fixed_width_column_wrapper<double> src(values.begin(), values.end());
-  return cudf::cast(src, t);
-}
-
-// "standardized" means the parameters sent into generate_typed_percentile_distribution. the intent
-// is to provide a standardized set of inputs for use with tdigest generation tests and
-// percentile_approx tests. std::vector<double>
-// buckets{10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0, 100.0}; std::vector<int>
-// sizes{50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
-inline std::unique_ptr<column> generate_standardized_percentile_distribution(
-  data_type t = data_type{type_id::FLOAT64}, bool sorted = false)
-{
-  std::vector<double> buckets{10.0f, 20.0f, 30.0f, 40.0f, 50.0f, 60.0f, 70.0f, 80.0, 90.0f, 100.0f};
-  std::vector<int> b_sizes{
-    50000, 50000, 50000, 50000, 50000, 100000, 100000, 100000, 100000, 100000};
-  return generate_typed_percentile_distribution(buckets, b_sizes, t, sorted);
-}
-
 }  // namespace test
 }  // namespace cudf
diff --git a/cpp/tests/quantiles/percentile_approx_test.cu b/cpp/tests/quantiles/percentile_approx_test.cpp
similarity index 69%
rename from cpp/tests/quantiles/percentile_approx_test.cu
rename to cpp/tests/quantiles/percentile_approx_test.cpp
index b02b7d6c336..c7db8894a23 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cu
+++ b/cpp/tests/quantiles/percentile_approx_test.cpp
@@ -13,56 +13,47 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <arrow/util/tdigest.h>
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/tdigest_utilities.cuh>
+#include <cudf_test/type_list_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
 
 #include <cudf/detail/tdigest/tdigest.hpp>
-#include <cudf/detail/valid_if.cuh>
 #include <cudf/groupby.hpp>
 #include <cudf/quantiles.hpp>
 #include <cudf/reduction.hpp>
+#include <cudf/sorting.hpp>
 #include <cudf/tdigest/tdigest_column_view.cuh>
 #include <cudf/transform.hpp>
-#include <cudf/unary.hpp>
 #include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/error.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/type_list_utilities.hpp>
-#include <cudf_test/type_lists.hpp>
-
-#include <rmm/exec_policy.hpp>
-
-#include <tests/groupby/groupby_test_util.hpp>
-
-#include <thrust/fill.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/transform.h>
-
-using namespace cudf;
-using namespace cudf::tdigest;
+#include <arrow/util/tdigest.h>
 
-std::unique_ptr<column> arrow_percentile_approx(column_view const& _values,
-                                                int delta,
-                                                std::vector<double> const& percentages)
+std::unique_ptr<cudf::column> arrow_percentile_approx(cudf::column_view const& _values,
+                                                      int delta,
+                                                      std::vector<double> const& percentages)
 {
   // sort the incoming values using the same settings that groupby does.
   // this is a little weak because null_order::AFTER is hardcoded internally to groupby.
-  table_view t({_values});
-  auto sorted_t      = cudf::sort(t, {}, {null_order::AFTER});
+  cudf::table_view t({_values});
+  auto sorted_t      = cudf::sort(t, {}, {cudf::null_order::AFTER});
   auto sorted_values = sorted_t->get_column(0).view();
 
   std::vector<double> h_values(sorted_values.size());
-  cudaMemcpy(h_values.data(),
-             sorted_values.data<double>(),
-             sizeof(double) * sorted_values.size(),
-             cudaMemcpyDeviceToHost);
+  CUDF_CUDA_TRY(cudaMemcpy(h_values.data(),
+                           sorted_values.data<double>(),
+                           sizeof(double) * sorted_values.size(),
+                           cudaMemcpyDeviceToHost));
   std::vector<char> h_validity(sorted_values.size());
   if (sorted_values.null_mask() != nullptr) {
     auto validity = cudf::mask_to_bools(sorted_values.null_mask(), 0, sorted_values.size());
-    cudaMemcpy(h_validity.data(),
-               (validity->view().data<char>()),
-               sizeof(char) * sorted_values.size(),
-               cudaMemcpyDeviceToHost);
+    CUDF_CUDA_TRY(cudaMemcpy(h_validity.data(),
+                             (validity->view().data<char>()),
+                             sizeof(char) * sorted_values.size(),
+                             cudaMemcpyDeviceToHost));
   }
 
   // generate the tdigest
@@ -79,8 +70,8 @@ std::unique_ptr<column> arrow_percentile_approx(column_view const& _values,
       return atd.Quantile(p);
     });
   cudf::test::fixed_width_column_wrapper<double> result(h_result.begin(), h_result.end());
-  cudf::test::fixed_width_column_wrapper<size_type> offsets{
-    0, static_cast<size_type>(percentages.size())};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{
+    0, static_cast<cudf::size_type>(percentages.size())};
   return cudf::make_lists_column(1, offsets.release(), result.release(), 0, {});
 }
 
@@ -89,18 +80,18 @@ struct percentile_approx_dispatch {
     typename T,
     typename Func,
     typename std::enable_if_t<cudf::is_numeric<T>() || cudf::is_fixed_point<T>()>* = nullptr>
-  std::unique_ptr<column> operator()(Func op,
-                                     column_view const& values,
-                                     int delta,
-                                     std::vector<double> const& percentages,
-                                     size_type ulps)
+  std::unique_ptr<cudf::column> operator()(Func op,
+                                           cudf::column_view const& values,
+                                           int delta,
+                                           std::vector<double> const& percentages,
+                                           cudf::size_type ulps)
   {
     // arrow implementation.
     auto expected = [&]() {
       // we're explicitly casting back to doubles here but this is ok because that is
       // exactly what happens inside of the cudf implementation as values are processed as well. so
       // this should not affect results.
-      auto as_doubles = cudf::cast(values, data_type{type_id::FLOAT64});
+      auto as_doubles = cudf::cast(values, cudf::data_type{cudf::type_id::FLOAT64});
       return arrow_percentile_approx(*as_doubles, delta, percentages);
     }();
 
@@ -109,7 +100,7 @@ struct percentile_approx_dispatch {
 
     cudf::test::fixed_width_column_wrapper<double> g_percentages(percentages.begin(),
                                                                  percentages.end());
-    tdigest_column_view tdv(*agg_result);
+    cudf::tdigest::tdigest_column_view tdv(*agg_result);
     auto result = cudf::percentile_approx(tdv, g_percentages);
 
     cudf::test::expect_columns_equivalent(
@@ -122,21 +113,21 @@ struct percentile_approx_dispatch {
     typename T,
     typename Func,
     typename std::enable_if_t<!cudf::is_numeric<T>() && !cudf::is_fixed_point<T>()>* = nullptr>
-  std::unique_ptr<column> operator()(Func op,
-                                     column_view const& values,
-                                     int delta,
-                                     std::vector<double> const& percentages,
-                                     size_type ulps)
+  std::unique_ptr<cudf::column> operator()(Func op,
+                                           cudf::column_view const& values,
+                                           int delta,
+                                           std::vector<double> const& percentages,
+                                           cudf::size_type ulps)
   {
     CUDF_FAIL("Invalid input type for percentile_approx test");
   }
 };
 
-void percentile_approx_test(column_view const& _keys,
-                            column_view const& _values,
+void percentile_approx_test(cudf::column_view const& _keys,
+                            cudf::column_view const& _values,
                             int delta,
                             std::vector<double> const& percentages,
-                            size_type ulps)
+                            cudf::size_type ulps)
 {
   // first pass:  validate the actual percentages we get per group.
 
@@ -146,8 +137,8 @@ void percentile_approx_test(column_view const& _keys,
   cudf::table_view v({_values});
   auto groups = pass1_gb.get_groups(v);
   // slice it all up so we have keys/columns for everything.
-  std::vector<column_view> keys;
-  std::vector<column_view> values;
+  std::vector<cudf::column_view> keys;
+  std::vector<cudf::column_view> values;
   for (size_t idx = 0; idx < groups.offsets.size() - 1; idx++) {
     auto k =
       cudf::slice(groups.keys->get_column(0), {groups.offsets[idx], groups.offsets[idx + 1]});
@@ -158,11 +149,11 @@ void percentile_approx_test(column_view const& _keys,
     values.push_back(v[0]);
   }
 
-  std::vector<std::unique_ptr<column>> groupby_parts;
-  std::vector<std::unique_ptr<column>> reduce_parts;
+  std::vector<std::unique_ptr<cudf::column>> groupby_parts;
+  std::vector<std::unique_ptr<cudf::column>> reduce_parts;
   for (size_t idx = 0; idx < values.size(); idx++) {
     // via groupby
-    auto groupby = [&](column_view const& values, int delta) {
+    auto groupby = [&](cudf::column_view const& values, int delta) {
       cudf::table_view t({keys[idx]});
       cudf::groupby::groupby gb(t);
       std::vector<cudf::groupby::aggregation_request> requests;
@@ -180,12 +171,12 @@ void percentile_approx_test(column_view const& _keys,
                                                   ulps));
 
     // via reduce
-    auto reduce = [](column_view const& values, int delta) {
+    auto reduce = [](cudf::column_view const& values, int delta) {
       // result is a scalar, but we want to extract out the underlying column
       auto scalar_result =
         cudf::reduce(values,
                      *cudf::make_tdigest_aggregation<cudf::reduce_aggregation>(delta),
-                     data_type{type_id::STRUCT});
+                     cudf::data_type{cudf::type_id::STRUCT});
       auto tbl = static_cast<cudf::struct_scalar const*>(scalar_result.get())->view();
       std::vector<std::unique_ptr<cudf::column>> cols;
       std::transform(
@@ -206,11 +197,11 @@ void percentile_approx_test(column_view const& _keys,
 
   // second pass. run the percentile_approx with all the keys in one pass and make sure we get the
   // same results as the concatenated by-key results.
-  std::vector<column_view> part_views;
+  std::vector<cudf::column_view> part_views;
   std::transform(groupby_parts.begin(),
                  groupby_parts.end(),
                  std::back_inserter(part_views),
-                 [](std::unique_ptr<column> const& c) { return c->view(); });
+                 [](std::unique_ptr<cudf::column> const& c) { return c->view(); });
   auto expected = cudf::concatenate(part_views);
 
   cudf::groupby::groupby gb(k);
@@ -222,22 +213,20 @@ void percentile_approx_test(column_view const& _keys,
 
   cudf::test::fixed_width_column_wrapper<double> g_percentages(percentages.begin(),
                                                                percentages.end());
-  tdigest_column_view tdv(*(gb_result.second[0].results[0]));
+  cudf::tdigest::tdigest_column_view tdv(*(gb_result.second[0].results[0]));
   auto result = cudf::percentile_approx(tdv, g_percentages);
 
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*expected, *result);
 }
 
-void simple_test(data_type input_type, std::vector<std::pair<int, int>> params)
+void simple_test(cudf::data_type input_type, std::vector<std::pair<int, int>> params)
 {
   auto values = cudf::test::generate_standardized_percentile_distribution(input_type);
   // all in the same group
   auto keys = cudf::make_fixed_width_column(
-    data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
-  thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
-               keys->mutable_view().template begin<int>(),
-               keys->mutable_view().template end<int>(),
-               0);
+    cudf::data_type{cudf::type_id::INT32}, values->size(), cudf::mask_state::UNALLOCATED);
+  CUDF_CUDA_TRY(
+    cudaMemset(keys->mutable_view().data<int32_t>(), 0, values->size() * sizeof(int32_t)));
 
   // runs both groupby and reduce paths
   std::for_each(params.begin(), params.end(), [&](std::pair<int, int> const& params) {
@@ -247,21 +236,22 @@ void simple_test(data_type input_type, std::vector<std::pair<int, int>> params)
 }
 
 struct group_index {
-  __device__ int operator()(int i) { return i / 150000; }
+  int32_t operator()(int32_t i) { return i / 150000; }
 };
 
-void grouped_test(data_type input_type, std::vector<std::pair<int, int>> params)
+void grouped_test(cudf::data_type input_type, std::vector<std::pair<int, int>> params)
 {
   auto values = cudf::test::generate_standardized_percentile_distribution(input_type);
   // all in the same group
   auto keys = cudf::make_fixed_width_column(
-    data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
-  auto i = thrust::make_counting_iterator(0);
-  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
-                    i,
-                    i + values->size(),
-                    keys->mutable_view().template begin<int>(),
-                    group_index{});
+    cudf::data_type{cudf::type_id::INT32}, values->size(), cudf::mask_state::UNALLOCATED);
+  auto i      = thrust::make_counting_iterator(0);
+  auto h_keys = std::vector<int32_t>(values->size());
+  std::transform(i, i + values->size(), h_keys.begin(), group_index{});
+  CUDF_CUDA_TRY(cudaMemcpy(keys->mutable_view().data<int32_t>(),
+                           h_keys.data(),
+                           h_keys.size() * sizeof(int32_t),
+                           cudaMemcpyHostToDevice));
 
   std::for_each(params.begin(), params.end(), [&](std::pair<int, int> const& params) {
     percentile_approx_test(
@@ -269,25 +259,21 @@ void grouped_test(data_type input_type, std::vector<std::pair<int, int>> params)
   });
 }
 
-std::pair<rmm::device_buffer, size_type> make_null_mask(column_view const& col)
+std::pair<rmm::device_buffer, cudf::size_type> make_null_mask(cudf::column_view const& col)
 {
-  return cudf::detail::valid_if(
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(col.size()),
-    [] __device__(size_type i) { return i % 2 == 0; },
-    cudf::get_default_stream());
+  auto itr  = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
+  auto mask = cudf::test::detail::make_null_mask(itr, itr + col.size());
+  return std::make_pair(std::move(mask), col.size() / 2);
 }
 
-void simple_with_nulls_test(data_type input_type, std::vector<std::pair<int, int>> params)
+void simple_with_nulls_test(cudf::data_type input_type, std::vector<std::pair<int, int>> params)
 {
   auto values = cudf::test::generate_standardized_percentile_distribution(input_type);
   // all in the same group
   auto keys = cudf::make_fixed_width_column(
-    data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
-  thrust::fill(rmm::exec_policy(cudf::get_default_stream()),
-               keys->mutable_view().template begin<int>(),
-               keys->mutable_view().template end<int>(),
-               0);
+    cudf::data_type{cudf::type_id::INT32}, values->size(), cudf::mask_state::UNALLOCATED);
+  CUDF_CUDA_TRY(
+    cudaMemset(keys->mutable_view().data<int32_t>(), 0, values->size() * sizeof(int32_t)));
 
   // add a null mask
   auto mask = make_null_mask(*values);
@@ -299,18 +285,19 @@ void simple_with_nulls_test(data_type input_type, std::vector<std::pair<int, int
   });
 }
 
-void grouped_with_nulls_test(data_type input_type, std::vector<std::pair<int, int>> params)
+void grouped_with_nulls_test(cudf::data_type input_type, std::vector<std::pair<int, int>> params)
 {
   auto values = cudf::test::generate_standardized_percentile_distribution(input_type);
   // all in the same group
   auto keys = cudf::make_fixed_width_column(
-    data_type{type_id::INT32}, values->size(), mask_state::UNALLOCATED);
-  auto i = thrust::make_counting_iterator(0);
-  thrust::transform(rmm::exec_policy(cudf::get_default_stream()),
-                    i,
-                    i + values->size(),
-                    keys->mutable_view().template begin<int>(),
-                    group_index{});
+    cudf::data_type{cudf::type_id::INT32}, values->size(), cudf::mask_state::UNALLOCATED);
+  auto i      = thrust::make_counting_iterator(0);
+  auto h_keys = std::vector<int32_t>(values->size());
+  std::transform(i, i + values->size(), h_keys.begin(), group_index{});
+  CUDF_CUDA_TRY(cudaMemcpy(keys->mutable_view().data<int32_t>(),
+                           h_keys.data(),
+                           h_keys.size() * sizeof(int32_t),
+                           cudaMemcpyHostToDevice));
 
   // add a null mask
   auto mask = make_null_mask(*values);
@@ -323,10 +310,10 @@ void grouped_with_nulls_test(data_type input_type, std::vector<std::pair<int, in
 }
 
 template <typename T>
-data_type get_appropriate_type()
+cudf::data_type get_appropriate_type()
 {
-  if constexpr (cudf::is_fixed_point<T>()) { return data_type{cudf::type_to_id<T>(), -7}; }
-  return data_type{cudf::type_to_id<T>()};
+  if constexpr (cudf::is_fixed_point<T>()) { return cudf::data_type{cudf::type_to_id<T>(), -7}; }
+  return cudf::data_type{cudf::type_to_id<T>()};
 }
 
 using PercentileApproxTypes =
@@ -389,21 +376,21 @@ TEST_F(PercentileApproxTest, EmptyInput)
   auto empty_ = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
   cudf::test::fixed_width_column_wrapper<double> percentiles{0.0, 0.25, 0.3};
 
-  std::vector<column_view> input;
+  std::vector<cudf::column_view> input;
   input.push_back(*empty_);
   input.push_back(*empty_);
   input.push_back(*empty_);
   auto empty = cudf::concatenate(input);
 
-  tdigest_column_view tdv(*empty);
+  cudf::tdigest::tdigest_column_view tdv(*empty);
   auto result = cudf::percentile_approx(tdv, percentiles);
 
-  cudf::test::fixed_width_column_wrapper<offset_type> offsets{0, 0, 0, 0};
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0, 0, 0};
   std::vector<bool> nulls{0, 0, 0};
   auto expected =
     cudf::make_lists_column(3,
                             offsets.release(),
-                            cudf::make_empty_column(type_id::FLOAT64),
+                            cudf::make_empty_column(cudf::type_id::FLOAT64),
                             3,
                             cudf::test::detail::make_null_mask(nulls.begin(), nulls.end()));
 
@@ -426,16 +413,18 @@ TEST_F(PercentileApproxTest, EmptyPercentiles)
 
   cudf::test::fixed_width_column_wrapper<double> percentiles{};
 
-  tdigest_column_view tdv(*tdigest_column.second[0].results[0]);
+  cudf::tdigest::tdigest_column_view tdv(*tdigest_column.second[0].results[0]);
   auto result = cudf::percentile_approx(tdv, percentiles);
 
-  cudf::test::fixed_width_column_wrapper<offset_type> offsets{0, 0, 0};
-  auto expected = cudf::make_lists_column(
-    2,
-    offsets.release(),
-    cudf::make_empty_column(type_id::FLOAT64),
-    2,
-    cudf::detail::create_null_mask(2, mask_state::ALL_NULL, cudf::get_default_stream()));
+  cudf::test::fixed_width_column_wrapper<cudf::offset_type> offsets{0, 0, 0};
+  std::vector<bool> nulls{0, 0};
+  auto expected =
+    cudf::make_lists_column(2,
+                            offsets.release(),
+                            cudf::make_empty_column(cudf::type_id::FLOAT64),
+                            2,
+                            cudf::test::detail::make_null_mask(nulls.begin(), nulls.end()));
+  // cudf::detail::create_null_mask(2, cudf::mask_state::ALL_NULL, cudf::get_default_stream()));
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
 }
@@ -454,7 +443,7 @@ TEST_F(PercentileApproxTest, NullPercentiles)
   requests.push_back({values, std::move(aggregations)});
   auto tdigest_column = gb.aggregate(requests);
 
-  tdigest_column_view tdv(*tdigest_column.second[0].results[0]);
+  cudf::tdigest::tdigest_column_view tdv(*tdigest_column.second[0].results[0]);
 
   cudf::test::fixed_width_column_wrapper<double> npercentiles{{0.5, 0.5, 1.0, 1.0}, {0, 0, 1, 1}};
   auto result = cudf::percentile_approx(tdv, npercentiles);
diff --git a/cpp/tests/quantiles/quantile_test.cpp b/cpp/tests/quantiles/quantile_test.cpp
index 20acdd02a93..6dfe4f5169b 100644
--- a/cpp/tests/quantiles/quantile_test.cpp
+++ b/cpp/tests/quantiles/quantile_test.cpp
@@ -14,26 +14,21 @@
  * limitations under the License.
  */
 
-#include <cudf/quantiles.hpp>
-#include <cudf/scalar/scalar.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/type_list_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
+
+#include <cudf/quantiles.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
+
 #include <limits>
 #include <memory>
 #include <type_traits>
 #include <vector>
 
-using namespace cudf::test;
-
-using cudf::null_order;
-using cudf::order;
-using std::vector;
-
 namespace {
 struct q_res {
   q_res(double value, bool is_valid = true) : is_valid(is_valid), value(value) {}
@@ -77,9 +72,9 @@ struct q_expect {
 
 template <typename T>
 struct test_case {
-  fixed_width_column_wrapper<T> column;
-  vector<q_expect> expectations;
-  fixed_width_column_wrapper<cudf::size_type> ordered_indices;
+  cudf::test::fixed_width_column_wrapper<T> column;
+  std::vector<q_expect> expectations;
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> ordered_indices;
 };
 
 // interpolate_center
@@ -104,7 +99,7 @@ test_case<T> interpolate_center()
   }();
   auto max_d = static_cast<double>(max);
   auto low_d = static_cast<double>(low);
-  return test_case<T>{fixed_width_column_wrapper<T>({low, max}),
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({low, max}),
                       {q_expect{0.50, max_d, low_d, lin_d, mid_d, low_d}}};
 }
 
@@ -116,7 +111,7 @@ test_case<bool> interpolate_center()
   auto mid_d = 0.5;
   auto low_d = static_cast<double>(low);
   auto max_d = static_cast<double>(max);
-  return test_case<bool>{fixed_width_column_wrapper<bool>({low, max}),
+  return test_case<bool>{cudf::test::fixed_width_column_wrapper<bool>({low, max}),
                          {q_expect{0.5, max_d, low_d, mid_d, mid_d, low_d}}};
 }
 
@@ -130,7 +125,7 @@ test_case<T> interpolate_extrema_high()
   auto low_d   = static_cast<double>(low);
   auto max_d   = static_cast<double>(max);
   auto exact_d = static_cast<double>(max - 1);
-  return test_case<T>{fixed_width_column_wrapper<T>({low, max}),
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({low, max}),
                       {q_expect{0.50, max_d, low_d, exact_d, exact_d, low_d}}};
 }
 
@@ -151,7 +146,7 @@ test_case<T> interpolate_extrema_low()
   auto a_d     = static_cast<double>(a);
   auto b_d     = static_cast<double>(b);
   auto exact_d = static_cast<double>(a + 1);
-  return test_case<T>{fixed_width_column_wrapper<T>({a, b}),
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({a, b}),
                       {q_expect{0.50, b_d, a_d, exact_d, exact_d, a_d}}};
 }
 
@@ -166,7 +161,7 @@ test_case<bool> interpolate_extrema_low<bool>()
 template <typename T>
 std::enable_if_t<std::is_floating_point_v<T>, test_case<T>> single()
 {
-  return test_case<T>{fixed_width_column_wrapper<T>({7.309999942779541}),
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({7.309999942779541}),
                       {
                         q_expect{
                           -1.0,
@@ -198,13 +193,15 @@ std::enable_if_t<std::is_floating_point_v<T>, test_case<T>> single()
 template <typename T>
 std::enable_if_t<std::is_integral_v<T> and not cudf::is_boolean<T>(), test_case<T>> single()
 {
-  return test_case<T>{fixed_width_column_wrapper<T>({1}), {q_expect{0.7, 1, 1, 1, 1, 1}}};
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({1}),
+                      {q_expect{0.7, 1, 1, 1, 1, 1}}};
 }
 
 template <typename T>
 std::enable_if_t<cudf::is_boolean<T>(), test_case<T>> single()
 {
-  return test_case<T>{fixed_width_column_wrapper<T>({1}), {q_expect{0.7, 1.0, 1.0, 1.0, 1.0, 1.0}}};
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({1}),
+                      {q_expect{0.7, 1.0, 1.0, 1.0, 1.0, 1.0}}};
 }
 
 // all_invalid
@@ -213,25 +210,25 @@ template <typename T>
 std::enable_if_t<std::is_floating_point_v<T>, test_case<T>> all_invalid()
 {
   return test_case<T>{
-    fixed_width_column_wrapper<T>({6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7},
-                                  {0, 0, 0, 0, 0, 0, 0, 0, 0}),
+    cudf::test::fixed_width_column_wrapper<T>({6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.01, 0.8, 5.7},
+                                              {0, 0, 0, 0, 0, 0, 0, 0, 0}),
     {q_expect{-1.0}, q_expect{0.0}, q_expect{0.5}, q_expect{1.0}, q_expect{2.0}}};
 }
 
 template <typename T>
 std::enable_if_t<std::is_integral_v<T> and not cudf::is_boolean<T>(), test_case<T>> all_invalid()
 {
-  return test_case<T>{
-    fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, -1, 1, 6}, {0, 0, 0, 0, 0, 0, 0, 0, 0}),
-    {q_expect{0.7}}};
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, -1, 1, 6},
+                                                                {0, 0, 0, 0, 0, 0, 0, 0, 0}),
+                      {q_expect{0.7}}};
 }
 
 template <typename T>
 std::enable_if_t<cudf::is_boolean<T>(), test_case<T>> all_invalid()
 {
-  return test_case<T>{
-    fixed_width_column_wrapper<T>({1, 0, 1, 1, 0, 1, 0, 1, 1}, {0, 0, 0, 0, 0, 0, 0, 0, 0}),
-    {q_expect{0.7}}};
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({1, 0, 1, 1, 0, 1, 0, 1, 1},
+                                                                {0, 0, 0, 0, 0, 0, 0, 0, 0}),
+                      {q_expect{0.7}}};
 }
 
 // some invalid
@@ -244,14 +241,14 @@ std::enable_if_t<std::is_same_v<T, double>, test_case<T>> some_invalid()
   T mid  = -0.432;
   T lin  = -0.432;
   return test_case<T>{
-    fixed_width_column_wrapper<T>({6.8, high, 3.4, 4.17, 2.13, 1.11, low, 0.8, 5.7},
-                                  {0, 1, 0, 0, 0, 0, 1, 0, 0}),
+    cudf::test::fixed_width_column_wrapper<T>({6.8, high, 3.4, 4.17, 2.13, 1.11, low, 0.8, 5.7},
+                                              {0, 1, 0, 0, 0, 0, 1, 0, 0}),
     {q_expect{-1.0, low, low, low, low, low},
      q_expect{0.0, low, low, low, low, low},
      q_expect{0.5, high, low, lin, mid, low},
      q_expect{1.0, high, high, high, high, high},
      q_expect{2.0, high, high, high, high, high}},
-    fixed_width_column_wrapper<cudf::size_type>({6, 1})};
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>({6, 1})};
 }
 
 template <typename T>
@@ -261,7 +258,7 @@ std::enable_if_t<std::is_same_v<T, float>, test_case<T>> some_invalid()
   T low      = -1.024;
   double mid = -0.43200002610683441;
   double lin = -0.43200002610683441;
-  return test_case<T>{fixed_width_column_wrapper<T>(
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>(
                         {T(6.8), high, T(3.4), T(4.17), T(2.13), T(1.11), low, T(0.8), T(5.7)},
                         {0, 1, 0, 0, 0, 0, 1, 0, 0}),
                       {q_expect{-1.0, low, low, low, low, low},
@@ -269,29 +266,29 @@ std::enable_if_t<std::is_same_v<T, float>, test_case<T>> some_invalid()
                        q_expect{0.5, high, low, lin, mid, low},
                        q_expect{1.0, high, high, high, high, high},
                        q_expect{2.0, high, high, high, high, high}},
-                      fixed_width_column_wrapper<cudf::size_type>({6, 1})};
+                      cudf::test::fixed_width_column_wrapper<cudf::size_type>({6, 1})};
 }
 
 template <typename T>
 std::enable_if_t<std::is_integral_v<T> and not cudf::is_boolean<T>(), test_case<T>> some_invalid()
 {
-  return test_case<T>{
-    fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, -1, 1, 6}, {0, 0, 1, 0, 0, 0, 0, 0, 1}),
-    {q_expect{0.0, 3.0, 3.0, 3.0, 3.0, 3.0},
-     q_expect{0.5, 6.0, 3.0, 4.5, 4.5, 3.0},
-     q_expect{1.0, 6.0, 6.0, 6.0, 6.0, 6.0}},
-    fixed_width_column_wrapper<cudf::size_type>({2, 8})};
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, -1, 1, 6},
+                                                                {0, 0, 1, 0, 0, 0, 0, 0, 1}),
+                      {q_expect{0.0, 3.0, 3.0, 3.0, 3.0, 3.0},
+                       q_expect{0.5, 6.0, 3.0, 4.5, 4.5, 3.0},
+                       q_expect{1.0, 6.0, 6.0, 6.0, 6.0, 6.0}},
+                      cudf::test::fixed_width_column_wrapper<cudf::size_type>({2, 8})};
 }
 
 template <typename T>
 std::enable_if_t<cudf::is_boolean<T>(), test_case<T>> some_invalid()
 {
-  return test_case<T>{
-    fixed_width_column_wrapper<T>({1, 0, 1, 1, 0, 1, 0, 1, 1}, {0, 0, 1, 0, 1, 0, 0, 0, 0}),
-    {q_expect{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
-     q_expect{0.5, 1.0, 0.0, 0.5, 0.5, 0.0},
-     q_expect{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}},
-    fixed_width_column_wrapper<cudf::size_type>({4, 2})};
+  return test_case<T>{cudf::test::fixed_width_column_wrapper<T>({1, 0, 1, 1, 0, 1, 0, 1, 1},
+                                                                {0, 0, 1, 0, 1, 0, 0, 0, 0}),
+                      {q_expect{0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
+                       q_expect{0.5, 1.0, 0.0, 0.5, 0.5, 0.0},
+                       q_expect{1.0, 1.0, 1.0, 1.0, 1.0, 1.0}},
+                      cudf::test::fixed_width_column_wrapper<cudf::size_type>({4, 2})};
 }
 
 // unsorted
@@ -300,38 +297,41 @@ template <typename T>
 std::enable_if_t<std::is_floating_point_v<T>, test_case<T>> unsorted()
 {
   return test_case<T>{
-    fixed_width_column_wrapper<T>({6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.00, 0.8, 5.7}),
+    cudf::test::fixed_width_column_wrapper<T>({6.8, 0.15, 3.4, 4.17, 2.13, 1.11, -1.00, 0.8, 5.7}),
     {
       q_expect{0.0, -1.00, -1.00, -1.00, -1.00, -1.00},
     },
-    fixed_width_column_wrapper<cudf::size_type>({6, 1, 7, 5, 4, 2, 3, 8, 0})};
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>({6, 1, 7, 5, 4, 2, 3, 8, 0})};
 }
 
 template <typename T>
 std::enable_if_t<std::is_integral_v<T> and not cudf::is_boolean<T>(), test_case<T>> unsorted()
 {
   return std::is_signed<T>()
-           ? test_case<T>{fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, -1, 1, 6}),
+           ? test_case<T>{cudf::test::fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, -1, 1, 6}),
                           {q_expect{0.0, -1, -1, -1, -1, -1}},
-                          fixed_width_column_wrapper<cudf::size_type>({6, 1, 7, 5, 4, 2, 3, 8, 0})}
-           : test_case<T>{fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, 1, 1, 6}),
+                          cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+                            {6, 1, 7, 5, 4, 2, 3, 8, 0})}
+           : test_case<T>{cudf::test::fixed_width_column_wrapper<T>({6, 0, 3, 4, 2, 1, 1, 1, 6}),
                           {q_expect{0.0, 1, 1, 1, 1, 1}},
-                          fixed_width_column_wrapper<cudf::size_type>({6, 1, 7, 5, 4, 2, 3, 8, 0})};
+                          cudf::test::fixed_width_column_wrapper<cudf::size_type>(
+                            {6, 1, 7, 5, 4, 2, 3, 8, 0})};
 }
 
 template <typename T>
 std::enable_if_t<cudf::is_boolean<T>(), test_case<T>> unsorted()
 {
-  return test_case<T>{fixed_width_column_wrapper<T>({0, 0, 1, 1, 0, 1, 1, 0, 1}),
-                      {q_expect{
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                        0.0,
-                      }},
-                      fixed_width_column_wrapper<cudf::size_type>({0, 1, 4, 7, 2, 3, 5, 6, 9})};
+  return test_case<T>{
+    cudf::test::fixed_width_column_wrapper<T>({0, 0, 1, 1, 0, 1, 1, 0, 1}),
+    {q_expect{
+      0.0,
+      0.0,
+      0.0,
+      0.0,
+      0.0,
+      0.0,
+    }},
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>({0, 1, 4, 7, 2, 3, 5, 6, 9})};
 }
 
 }  // namespace testdata
@@ -342,40 +342,39 @@ std::enable_if_t<cudf::is_boolean<T>(), test_case<T>> unsorted()
 template <typename T>
 void test(testdata::test_case<T> test_case)
 {
-  using namespace cudf;
-
   for (auto& expected : test_case.expectations) {
     auto q = std::vector<double>{expected.quantile};
 
     auto nullable = static_cast<cudf::column_view>(test_case.column).nullable();
 
     auto make_expected_column = [nullable](q_res expected) {
-      return nullable ? fixed_width_column_wrapper<double>({expected.value}, {expected.is_valid})
-                      : fixed_width_column_wrapper<double>({expected.value});
+      return nullable ? cudf::test::fixed_width_column_wrapper<double>({expected.value},
+                                                                       {expected.is_valid})
+                      : cudf::test::fixed_width_column_wrapper<double>({expected.value});
     };
 
     auto actual_higher =
-      quantile(test_case.column, q, interpolation::HIGHER, test_case.ordered_indices);
+      cudf::quantile(test_case.column, q, cudf::interpolation::HIGHER, test_case.ordered_indices);
     auto expected_higher_col = make_expected_column(expected.higher);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_higher_col, actual_higher->view());
 
     auto actual_lower =
-      quantile(test_case.column, q, interpolation::LOWER, test_case.ordered_indices);
+      cudf::quantile(test_case.column, q, cudf::interpolation::LOWER, test_case.ordered_indices);
     auto expected_lower_col = make_expected_column(expected.lower);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_lower_col, actual_lower->view());
 
     auto actual_linear =
-      quantile(test_case.column, q, interpolation::LINEAR, test_case.ordered_indices);
+      cudf::quantile(test_case.column, q, cudf::interpolation::LINEAR, test_case.ordered_indices);
     auto expected_linear_col = make_expected_column(expected.linear);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_linear_col, actual_linear->view());
 
     auto actual_midpoint =
-      quantile(test_case.column, q, interpolation::MIDPOINT, test_case.ordered_indices);
+      cudf::quantile(test_case.column, q, cudf::interpolation::MIDPOINT, test_case.ordered_indices);
     auto expected_midpoint_col = make_expected_column(expected.midpoint);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_midpoint_col, actual_midpoint->view());
 
     auto actual_nearest =
-      quantile(test_case.column, q, interpolation::NEAREST, test_case.ordered_indices);
+      cudf::quantile(test_case.column, q, cudf::interpolation::NEAREST, test_case.ordered_indices);
     auto expected_nearest_col = make_expected_column(expected.nearest);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_nearest_col, actual_nearest->view());
   }
@@ -385,10 +384,10 @@ void test(testdata::test_case<T> test_case)
 // ----- tests -----------------------------------------------------------------
 
 template <typename T>
-struct QuantileTest : public BaseFixture {
+struct QuantileTest : public cudf::test::BaseFixture {
 };
 
-using TestTypes = NumericTypes;
+using TestTypes = cudf::test::NumericTypes;
 TYPED_TEST_SUITE(QuantileTest, TestTypes);
 
 TYPED_TEST(QuantileTest, TestSingle) { test(testdata::single<TypeParam>()); }
@@ -413,60 +412,65 @@ TYPED_TEST(QuantileTest, TestInterpolateExtremaLow)
 
 TYPED_TEST(QuantileTest, TestEmpty)
 {
-  auto input    = fixed_width_column_wrapper<TypeParam>({});
+  auto input    = cudf::test::fixed_width_column_wrapper<TypeParam>({});
   auto expected = cudf::test::fixed_width_column_wrapper<double>({0, 0}, {0, 0});
   auto actual   = cudf::quantile(input, {0.5, 0.25});
 }
 
 template <typename T>
-struct QuantileUnsupportedTypesTest : public BaseFixture {
+struct QuantileUnsupportedTypesTest : public cudf::test::BaseFixture {
 };
 
 // TODO add tests for FixedPointTypes
-using UnsupportedTestTypes = RemoveIf<ContainedIn<Concat<TestTypes, FixedPointTypes>>, AllTypes>;
+using UnsupportedTestTypes = cudf::test::RemoveIf<
+  cudf::test::ContainedIn<cudf::test::Concat<TestTypes, cudf::test::FixedPointTypes>>,
+  cudf::test::AllTypes>;
 TYPED_TEST_SUITE(QuantileUnsupportedTypesTest, UnsupportedTestTypes);
 
 TYPED_TEST(QuantileUnsupportedTypesTest, TestZeroElements)
 {
-  fixed_width_column_wrapper<TypeParam> input({});
+  cudf::test::fixed_width_column_wrapper<TypeParam> input({});
 
   EXPECT_THROW(cudf::quantile(input, {0}), cudf::logic_error);
 }
 
 TYPED_TEST(QuantileUnsupportedTypesTest, TestOneElements)
 {
-  fixed_width_column_wrapper<TypeParam, int32_t> input({0});
+  cudf::test::fixed_width_column_wrapper<TypeParam, int32_t> input({0});
 
   EXPECT_THROW(cudf::quantile(input, {0}), cudf::logic_error);
 }
 
 TYPED_TEST(QuantileUnsupportedTypesTest, TestMultipleElements)
 {
-  fixed_width_column_wrapper<TypeParam, int32_t> input({0, 1, 2});
+  cudf::test::fixed_width_column_wrapper<TypeParam, int32_t> input({0, 1, 2});
 
   EXPECT_THROW(cudf::quantile(input, {0}), cudf::logic_error);
 }
 
-struct QuantileDictionaryTest : public BaseFixture {
+struct QuantileDictionaryTest : public cudf::test::BaseFixture {
 };
 
 TEST_F(QuantileDictionaryTest, TestValid)
 {
-  dictionary_column_wrapper<int32_t> col{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
-  fixed_width_column_wrapper<int32_t> indices{0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
+  cudf::test::dictionary_column_wrapper<int32_t> col{1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
+  cudf::test::fixed_width_column_wrapper<int32_t> indices{0, 2, 4, 6, 8, 1, 3, 5, 7, 9};
 
   auto result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper<double>{5.5});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(),
+                                      cudf::test::fixed_width_column_wrapper<double>{5.5});
 
   result = cudf::quantile(col, {0.5}, cudf::interpolation::LINEAR, indices);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper<double>{5.5});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(),
+                                      cudf::test::fixed_width_column_wrapper<double>{5.5});
 
   result = cudf::quantile(col, {0.1, 0.2}, cudf::interpolation::HIGHER);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), fixed_width_column_wrapper<double>{2.0, 3.0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(),
+                                      cudf::test::fixed_width_column_wrapper<double>{2.0, 3.0});
 
   result = cudf::quantile(col, {0.25, 0.5, 0.75}, cudf::interpolation::MIDPOINT);
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(),
-                                      fixed_width_column_wrapper<double>{3.5, 5.5, 7.5});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(
+    result->view(), cudf::test::fixed_width_column_wrapper<double>{3.5, 5.5, 7.5});
 };
 
 }  // anonymous namespace
diff --git a/cpp/tests/quantiles/quantiles_test.cpp b/cpp/tests/quantiles/quantiles_test.cpp
index b4d1b9984ab..f532e93c6c2 100644
--- a/cpp/tests/quantiles/quantiles_test.cpp
+++ b/cpp/tests/quantiles/quantiles_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,52 +20,45 @@
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
-#include <cudf/table/table.hpp>
-#include <cudf/table/table_view.hpp>
-#include <cudf/types.hpp>
-
 #include <cudf/column/column_view.hpp>
-#include <cudf/copying.hpp>
 #include <cudf/quantiles.hpp>
-#include <cudf/utilities/error.hpp>
-
-using namespace cudf;
-using namespace test;
+#include <cudf/table/table_view.hpp>
+#include <cudf/types.hpp>
 
 template <typename T>
-struct QuantilesTest : public BaseFixture {
+struct QuantilesTest : public cudf::test::BaseFixture {
 };
 
-using TestTypes = AllTypes;
+using TestTypes = cudf::test::AllTypes;
 
 TYPED_TEST_SUITE(QuantilesTest, TestTypes);
 
 TYPED_TEST(QuantilesTest, TestZeroColumns)
 {
-  auto input = table_view(std::vector<column_view>{});
+  auto input = cudf::table_view(std::vector<cudf::column_view>{});
 
-  EXPECT_THROW(quantiles(input, {0.0f}), logic_error);
+  EXPECT_THROW(cudf::quantiles(input, {0.0f}), cudf::logic_error);
 }
 
 TYPED_TEST(QuantilesTest, TestMultiColumnZeroRows)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> input_a({});
-  auto input = table_view({input_a});
+  cudf::test::fixed_width_column_wrapper<T> input_a({});
+  auto input = cudf::table_view({input_a});
 
-  EXPECT_THROW(quantiles(input, {0.0f}), logic_error);
+  EXPECT_THROW(cudf::quantiles(input, {0.0f}), cudf::logic_error);
 }
 
 TYPED_TEST(QuantilesTest, TestZeroRequestedQuantiles)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T, int32_t> input_a({1}, {1});
-  auto input = table_view(std::vector<column_view>{input_a});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> input_a({1}, {1});
+  auto input = cudf::table_view(std::vector<cudf::column_view>{input_a});
 
-  auto actual   = quantiles(input, {});
-  auto expected = empty_like(input);
+  auto actual   = cudf::quantiles(input, {});
+  auto expected = cudf::empty_like(input);
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), actual->view());
 }
@@ -74,75 +67,75 @@ TYPED_TEST(QuantilesTest, TestMultiColumnOrderCountMismatch)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> input_a({});
-  fixed_width_column_wrapper<T> input_b({});
-  auto input = table_view({input_a});
-
-  EXPECT_THROW(quantiles(input,
-                         {0.0f},
-                         interpolation::NEAREST,
-                         sorted::NO,
-                         {order::ASCENDING},
-                         {null_order::AFTER, null_order::AFTER}),
-               logic_error);
+  cudf::test::fixed_width_column_wrapper<T> input_a({});
+  cudf::test::fixed_width_column_wrapper<T> input_b({});
+  auto input = cudf::table_view({input_a});
+
+  EXPECT_THROW(cudf::quantiles(input,
+                               {0.0f},
+                               cudf::interpolation::NEAREST,
+                               cudf::sorted::NO,
+                               {cudf::order::ASCENDING},
+                               {cudf::null_order::AFTER, cudf::null_order::AFTER}),
+               cudf::logic_error);
 }
 
 TYPED_TEST(QuantilesTest, TestMultiColumnNullOrderCountMismatch)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> input_a({});
-  fixed_width_column_wrapper<T> input_b({});
-  auto input = table_view({input_a});
-
-  EXPECT_THROW(quantiles(input,
-                         {0.0f},
-                         interpolation::NEAREST,
-                         sorted::NO,
-                         {order::ASCENDING, order::ASCENDING},
-                         {null_order::AFTER}),
-               logic_error);
+  cudf::test::fixed_width_column_wrapper<T> input_a({});
+  cudf::test::fixed_width_column_wrapper<T> input_b({});
+  auto input = cudf::table_view({input_a});
+
+  EXPECT_THROW(cudf::quantiles(input,
+                               {0.0f},
+                               cudf::interpolation::NEAREST,
+                               cudf::sorted::NO,
+                               {cudf::order::ASCENDING, cudf::order::ASCENDING},
+                               {cudf::null_order::AFTER}),
+               cudf::logic_error);
 }
 
 TYPED_TEST(QuantilesTest, TestMultiColumnArithmeticInterpolation)
 {
   using T = TypeParam;
 
-  fixed_width_column_wrapper<T> input_a({});
-  fixed_width_column_wrapper<T> input_b({});
-  auto input = table_view({input_a});
+  cudf::test::fixed_width_column_wrapper<T> input_a({});
+  cudf::test::fixed_width_column_wrapper<T> input_b({});
+  auto input = cudf::table_view({input_a});
 
-  EXPECT_THROW(quantiles(input, {0.0f}, interpolation::LINEAR), logic_error);
+  EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::LINEAR), cudf::logic_error);
 
-  EXPECT_THROW(quantiles(input, {0.0f}, interpolation::MIDPOINT), logic_error);
+  EXPECT_THROW(cudf::quantiles(input, {0.0f}, cudf::interpolation::MIDPOINT), cudf::logic_error);
 }
 
 TYPED_TEST(QuantilesTest, TestMultiColumnUnsorted)
 {
   using T = TypeParam;
 
-  auto input_a = strings_column_wrapper(
+  auto input_a = cudf::test::strings_column_wrapper(
     {"C", "B", "A", "A", "D", "B", "D", "B", "D", "C", "C", "C",
      "D", "B", "D", "B", "C", "C", "A", "D", "B", "A", "A", "A"},
     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
 
-  fixed_width_column_wrapper<T, int32_t> input_b(
+  cudf::test::fixed_width_column_wrapper<T, int32_t> input_b(
     {4, 3, 5, 0, 1, 0, 4, 1, 5, 3, 0, 5, 2, 4, 3, 2, 1, 2, 3, 0, 5, 1, 4, 2},
     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
 
-  auto input = table_view({input_a, input_b});
+  auto input = cudf::table_view({input_a, input_b});
 
-  auto actual = quantiles(input,
-                          {0.0f, 0.5f, 0.7f, 0.25f, 1.0f},
-                          interpolation::NEAREST,
-                          sorted::NO,
-                          {order::ASCENDING, order::DESCENDING});
+  auto actual = cudf::quantiles(input,
+                                {0.0f, 0.5f, 0.7f, 0.25f, 1.0f},
+                                cudf::interpolation::NEAREST,
+                                cudf::sorted::NO,
+                                {cudf::order::ASCENDING, cudf::order::DESCENDING});
 
-  auto expected_a = strings_column_wrapper({"A", "C", "C", "B", "D"}, {1, 1, 1, 1, 1});
+  auto expected_a = cudf::test::strings_column_wrapper({"A", "C", "C", "B", "D"}, {1, 1, 1, 1, 1});
 
-  fixed_width_column_wrapper<T, int32_t> expected_b({5, 5, 1, 5, 0}, {1, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> expected_b({5, 5, 1, 5, 0}, {1, 1, 1, 1, 1});
 
-  auto expected = table_view({expected_a, expected_b});
+  auto expected = cudf::table_view({expected_a, expected_b});
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, actual->view());
 }
@@ -151,25 +144,25 @@ TYPED_TEST(QuantilesTest, TestMultiColumnAssumedSorted)
 {
   using T = TypeParam;
 
-  auto input_a = strings_column_wrapper(
+  auto input_a = cudf::test::strings_column_wrapper(
     {"C", "B", "A", "A", "D", "B", "D", "B", "D", "C", "C", "C",
      "D", "B", "D", "B", "C", "C", "A", "D", "B", "A", "A", "A"},
     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
 
-  fixed_width_column_wrapper<T, int32_t> input_b(
+  cudf::test::fixed_width_column_wrapper<T, int32_t> input_b(
     {4, 3, 5, 0, 1, 0, 4, 1, 5, 3, 0, 5, 2, 4, 3, 2, 1, 2, 3, 0, 5, 1, 4, 2},
     {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
 
-  auto input = table_view({input_a, input_b});
+  auto input = cudf::table_view({input_a, input_b});
 
-  auto actual =
-    quantiles(input, {0.0f, 0.5f, 0.7f, 0.25f, 1.0f}, interpolation::NEAREST, sorted::YES);
+  auto actual = cudf::quantiles(
+    input, {0.0f, 0.5f, 0.7f, 0.25f, 1.0f}, cudf::interpolation::NEAREST, cudf::sorted::YES);
 
-  auto expected_a = strings_column_wrapper({"C", "D", "C", "D", "A"}, {1, 1, 1, 1, 1});
+  auto expected_a = cudf::test::strings_column_wrapper({"C", "D", "C", "D", "A"}, {1, 1, 1, 1, 1});
 
-  fixed_width_column_wrapper<T, int32_t> expected_b({4, 2, 1, 4, 2}, {1, 1, 1, 1, 1});
+  cudf::test::fixed_width_column_wrapper<T, int32_t> expected_b({4, 2, 1, 4, 2}, {1, 1, 1, 1, 1});
 
-  auto expected = table_view({expected_a, expected_b});
+  auto expected = cudf::table_view({expected_a, expected_b});
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(expected, actual->view());
 }
diff --git a/cpp/tests/quantiles/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu
similarity index 100%
rename from cpp/tests/quantiles/tdigest_utilities.cu
rename to cpp/tests/utilities/tdigest_utilities.cu

From a72627a1dd44c36b37f75ab3866fd46f1c764c47 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 7 Nov 2022 15:54:25 -0800
Subject: [PATCH 134/202] Throw an error when libcudf is built without cuFile
 and `LIBCUDF_CUFILE_POLICY` is set to `"ALWAYS"` (#12080)

Currently, creating a cufile `datasource` or `data_sink` silently fails if libcudf was built without the cuFile headers. This is expected behavior when the `LIBCUDF_CUFILE_POLICY` is not set, or is set to and value other than "ALWAYS". However, with "ALWAYS", there should be no fallback from GDS.
This PR adds a check to fail loudly when `LIBCUDF_CUFILE_POLICY=="ALWAYS"` cannot be enforced because of missing dependency (cuFile).

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - David Wendt (https://github.com/davidwendt)
  - Mike Wilson (https://github.com/hyperbolic2346)

URL: https://github.com/rapidsai/cudf/pull/12080
---
 cpp/src/io/utilities/file_io_utilities.cpp | 14 ++++++++++----
 cpp/src/io/utilities/file_io_utilities.hpp |  2 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/cpp/src/io/utilities/file_io_utilities.cpp b/cpp/src/io/utilities/file_io_utilities.cpp
index c0dd85702e2..2484a36143a 100644
--- a/cpp/src/io/utilities/file_io_utilities.cpp
+++ b/cpp/src/io/utilities/file_io_utilities.cpp
@@ -257,11 +257,20 @@ std::future<void> cufile_output_impl::write_async(void const* data, size_t offse
   // writes.
   return std::async(std::launch::deferred, waiter, std::move(slice_tasks));
 }
+#else
+cufile_input_impl::cufile_input_impl(std::string const& filepath)
+{
+  CUDF_FAIL("Cannot create cuFile source, current build was compiled without cuFile headers");
+}
+
+cufile_output_impl::cufile_output_impl(std::string const& filepath)
+{
+  CUDF_FAIL("Cannot create cuFile sink, current build was compiled without cuFile headers");
+}
 #endif
 
 std::unique_ptr<cufile_input_impl> make_cufile_input(std::string const& filepath)
 {
-#ifdef CUFILE_FOUND
   if (cufile_integration::is_gds_enabled()) {
     try {
       return std::make_unique<cufile_input_impl>(filepath);
@@ -269,13 +278,11 @@ std::unique_ptr<cufile_input_impl> make_cufile_input(std::string const& filepath
       if (cufile_integration::is_always_enabled()) throw;
     }
   }
-#endif
   return nullptr;
 }
 
 std::unique_ptr<cufile_output_impl> make_cufile_output(std::string const& filepath)
 {
-#ifdef CUFILE_FOUND
   if (cufile_integration::is_gds_enabled()) {
     try {
       return std::make_unique<cufile_output_impl>(filepath);
@@ -283,7 +290,6 @@ std::unique_ptr<cufile_output_impl> make_cufile_output(std::string const& filepa
       if (cufile_integration::is_always_enabled()) throw;
     }
   }
-#endif
   return nullptr;
 }
 
diff --git a/cpp/src/io/utilities/file_io_utilities.hpp b/cpp/src/io/utilities/file_io_utilities.hpp
index 704ee77de8a..38674892966 100644
--- a/cpp/src/io/utilities/file_io_utilities.hpp
+++ b/cpp/src/io/utilities/file_io_utilities.hpp
@@ -194,6 +194,7 @@ class cufile_output_impl final : public cufile_output {
 
 class cufile_input_impl final : public cufile_input {
  public:
+  cufile_input_impl(std::string const& filepath);
   std::future<size_t> read_async(size_t offset,
                                  size_t size,
                                  uint8_t* dst,
@@ -205,6 +206,7 @@ class cufile_input_impl final : public cufile_input {
 
 class cufile_output_impl final : public cufile_output {
  public:
+  cufile_output_impl(std::string const& filepath);
   std::future<void> write_async(void const* data, size_t offset, size_t size) override
   {
     CUDF_FAIL("Only used to compile without cufile library, should not be called");

From ec46e7f679ab34e17474ec4062bc61c89e0f376c Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 7 Nov 2022 17:58:37 -0600
Subject: [PATCH 135/202] Move and update `dask` nigthly install in CI (#12082)

This PR updates `dask` nightly install to correctly install the packages.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12082
---
 ci/benchmark/build.sh | 4 ++--
 ci/gpu/build.sh       | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index a8bc33e00bc..0ea39a0b365 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -82,8 +82,8 @@ conda install "rmm=$MINOR_VERSION.*" "cudatoolkit=$CUDA_REL" \
 
 # Install the conda-forge or nightly version of dask and distributed
 if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
-    gpuci_logger "gpuci_mamba_retry update dask"
-    gpuci_mamba_retry update dask
+    gpuci_logger "gpuci_mamba_retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
+    gpuci_mamba_retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
 else
     gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
     gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 7d67efa77b1..500c3bdbcc5 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -96,8 +96,8 @@ function install_dask {
     gpuci_logger "Install the conda-forge or nightly version of dask and distributed"
     set -x
     if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
-        gpuci_logger "gpuci_mamba_retry update dask"
-        gpuci_mamba_retry update dask
+        gpuci_logger "gpuci_mamba_retry install -c dask/label/dev 'dask/label/dev::dask' 'dask/label/dev::distributed'"
+        gpuci_mamba_retry install -c dask/label/dev "dask/label/dev::dask" "dask/label/dev::distributed"
         conda list
     else
         gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
@@ -111,6 +111,8 @@ function install_dask {
     set +x
 }
 
+install_dask
+
 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
 
     gpuci_logger "Install dependencies"
@@ -126,8 +128,6 @@ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
     # gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
     # gpuci_mamba_retry install -y "your-pkg=1.0.0"
 
-    install_dask
-
     ################################################################################
     # BUILD - Build libcudf, cuDF, libcudf_kafka, dask_cudf, and strings_udf from source
     ################################################################################

From 2ced21489d7b4043b39dad1d60adae9b3f77fd57 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Mon, 7 Nov 2022 17:59:16 -0600
Subject: [PATCH 136/202] Use nosync policy in gather and scatter
 implementations. (#12038)

This PR uses `rmm::exec_policy_nosync` in libcudf's gather and scatter functions. These changes are motivated by performance improvements seen previously in #11577.

# Checklist
- [x] I am familiar with the [Contributing Guidelines](https://github.com/rapidsai/cudf/blob/HEAD/CONTRIBUTING.md).
- [x] New or existing tests cover these changes.
- [x] The documentation is up to date with these changes.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12038
---
 cpp/include/cudf/detail/gather.cuh          |  4 ++--
 cpp/include/cudf/detail/scatter.cuh         | 12 ++++++------
 cpp/include/cudf/lists/detail/gather.cuh    |  4 ++--
 cpp/include/cudf/lists/detail/scatter.cuh   |  6 +++---
 cpp/include/cudf/strings/detail/gather.cuh  | 10 ++++++----
 cpp/include/cudf/strings/detail/scatter.cuh |  3 ++-
 cpp/src/copying/scatter.cu                  | 11 +++++++----
 cpp/src/lists/copying/scatter_helper.cu     |  6 +++---
 cpp/tests/copying/detail_gather_tests.cu    |  2 +-
 9 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/cpp/include/cudf/detail/gather.cuh b/cpp/include/cudf/detail/gather.cuh
index 2acdc007afa..57d834e6277 100644
--- a/cpp/include/cudf/detail/gather.cuh
+++ b/cpp/include/cudf/detail/gather.cuh
@@ -128,7 +128,7 @@ void gather_helper(InputItr source_itr,
 {
   using map_type = typename std::iterator_traits<MapIterator>::value_type;
   if (nullify_out_of_bounds) {
-    thrust::gather_if(rmm::exec_policy(stream),
+    thrust::gather_if(rmm::exec_policy_nosync(stream),
                       gather_map_begin,
                       gather_map_end,
                       gather_map_begin,
@@ -137,7 +137,7 @@ void gather_helper(InputItr source_itr,
                       bounds_checker<map_type>{0, source_size});
   } else {
     thrust::gather(
-      rmm::exec_policy(stream), gather_map_begin, gather_map_end, source_itr, target_itr);
+      rmm::exec_policy_nosync(stream), gather_map_begin, gather_map_end, source_itr, target_itr);
   }
 }
 
diff --git a/cpp/include/cudf/detail/scatter.cuh b/cpp/include/cudf/detail/scatter.cuh
index ad5a2134afe..c8b17e22df2 100644
--- a/cpp/include/cudf/detail/scatter.cuh
+++ b/cpp/include/cudf/detail/scatter.cuh
@@ -79,14 +79,14 @@ auto scatter_to_gather(MapIterator scatter_map_begin,
   // We'll use the `numeric_limits::lowest()` value for this since it should always be outside the
   // valid range.
   auto gather_map = rmm::device_uvector<size_type>(gather_rows, stream);
-  thrust::uninitialized_fill(rmm::exec_policy(stream),
+  thrust::uninitialized_fill(rmm::exec_policy_nosync(stream),
                              gather_map.begin(),
                              gather_map.end(),
                              std::numeric_limits<size_type>::lowest());
 
   // Convert scatter map to a gather map
   thrust::scatter(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     thrust::make_counting_iterator<MapValueType>(0),
     thrust::make_counting_iterator<MapValueType>(std::distance(scatter_map_begin, scatter_map_end)),
     scatter_map_begin,
@@ -114,13 +114,13 @@ auto scatter_to_gather_complement(MapIterator scatter_map_begin,
                                   rmm::cuda_stream_view stream)
 {
   auto gather_map = rmm::device_uvector<size_type>(gather_rows, stream);
-  thrust::sequence(rmm::exec_policy(stream), gather_map.begin(), gather_map.end(), 0);
+  thrust::sequence(rmm::exec_policy_nosync(stream), gather_map.begin(), gather_map.end(), 0);
 
   auto const out_of_bounds_begin =
     thrust::make_constant_iterator(std::numeric_limits<size_type>::lowest());
   auto const out_of_bounds_end =
     out_of_bounds_begin + thrust::distance(scatter_map_begin, scatter_map_end);
-  thrust::scatter(rmm::exec_policy(stream),
+  thrust::scatter(rmm::exec_policy_nosync(stream),
                   out_of_bounds_begin,
                   out_of_bounds_end,
                   scatter_map_begin,
@@ -152,7 +152,7 @@ struct column_scatterer_impl<Element, std::enable_if_t<cudf::is_fixed_width<Elem
 
     // NOTE use source.begin + scatter rows rather than source.end in case the
     // scatter map is smaller than the number of source rows
-    thrust::scatter(rmm::exec_policy(stream),
+    thrust::scatter(rmm::exec_policy_nosync(stream),
                     source.begin<Element>(),
                     source.begin<Element>() + cudf::distance(scatter_map_begin, scatter_map_end),
                     scatter_map_begin,
@@ -226,7 +226,7 @@ struct column_scatterer_impl<dictionary32> {
     auto source_itr  = indexalator_factory::make_input_iterator(source_view.indices());
     auto new_indices = std::make_unique<column>(target_view.get_indices_annotated(), stream, mr);
     auto target_itr  = indexalator_factory::make_output_iterator(new_indices->mutable_view());
-    thrust::scatter(rmm::exec_policy(stream),
+    thrust::scatter(rmm::exec_policy_nosync(stream),
                     source_itr,
                     source_itr + std::distance(scatter_map_begin, scatter_map_end),
                     scatter_map_begin,
diff --git a/cpp/include/cudf/lists/detail/gather.cuh b/cpp/include/cudf/lists/detail/gather.cuh
index 7db908c5b52..48c0ed8f6e9 100644
--- a/cpp/include/cudf/lists/detail/gather.cuh
+++ b/cpp/include/cudf/lists/detail/gather.cuh
@@ -89,7 +89,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
   // generate the compacted outgoing offsets.
   auto count_iter = thrust::make_counting_iterator<int32_t>(0);
   thrust::transform_exclusive_scan(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     count_iter,
     count_iter + offset_count,
     dst_offsets_v.begin<int32_t>(),
@@ -125,7 +125,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
   // generate the base offsets
   rmm::device_uvector<int32_t> base_offsets = rmm::device_uvector<int32_t>(output_count, stream);
   thrust::transform(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     gather_map,
     gather_map + output_count,
     base_offsets.data(),
diff --git a/cpp/include/cudf/lists/detail/scatter.cuh b/cpp/include/cudf/lists/detail/scatter.cuh
index 5d89a9be29c..f4106fb5cdf 100644
--- a/cpp/include/cudf/lists/detail/scatter.cuh
+++ b/cpp/include/cudf/lists/detail/scatter.cuh
@@ -58,7 +58,7 @@ rmm::device_uvector<unbound_list_view> list_vector_from_column(
 
   auto vector = rmm::device_uvector<unbound_list_view>(n_rows, stream, mr);
 
-  thrust::transform(rmm::exec_policy(stream),
+  thrust::transform(rmm::exec_policy_nosync(stream),
                     index_begin,
                     index_end,
                     vector.begin(),
@@ -104,7 +104,7 @@ std::unique_ptr<column> scatter_impl(
   auto const child_column_type = lists_column_view(target).child().type();
 
   // Scatter.
-  thrust::scatter(rmm::exec_policy(stream),
+  thrust::scatter(rmm::exec_policy_nosync(stream),
                   source_vector.begin(),
                   source_vector.end(),
                   scatter_map_begin,
@@ -239,7 +239,7 @@ std::unique_ptr<column> scatter(
               : cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream, mr);
   auto offset_column = make_numeric_column(
     data_type{type_to_id<offset_type>()}, 2, mask_state::UNALLOCATED, stream, mr);
-  thrust::sequence(rmm::exec_policy(stream),
+  thrust::sequence(rmm::exec_policy_nosync(stream),
                    offset_column->mutable_view().begin<offset_type>(),
                    offset_column->mutable_view().end<offset_type>(),
                    0,
diff --git a/cpp/include/cudf/strings/detail/gather.cuh b/cpp/include/cudf/strings/detail/gather.cuh
index 4820e6e77c7..28b98eac3b5 100644
--- a/cpp/include/cudf/strings/detail/gather.cuh
+++ b/cpp/include/cudf/strings/detail/gather.cuh
@@ -305,7 +305,7 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
   auto const d_in_offsets  = (strings_count > 0) ? strings.offsets_begin() : nullptr;
   auto const d_strings     = column_device_view::create(strings.parent(), stream);
   thrust::transform(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     begin,
     end,
     d_out_offsets,
@@ -317,7 +317,7 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
 
   // check total size is not too large
   size_t const total_bytes = thrust::transform_reduce(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     d_out_offsets,
     d_out_offsets + output_count,
     [] __device__(auto size) { return static_cast<size_t>(size); },
@@ -327,8 +327,10 @@ std::unique_ptr<cudf::column> gather(strings_column_view const& strings,
                "total size of output strings is too large for a cudf column");
 
   // In-place convert output sizes into offsets
-  thrust::exclusive_scan(
-    rmm::exec_policy(stream), d_out_offsets, d_out_offsets + output_count + 1, d_out_offsets);
+  thrust::exclusive_scan(rmm::exec_policy_nosync(stream),
+                         d_out_offsets,
+                         d_out_offsets + output_count + 1,
+                         d_out_offsets);
 
   // build chars column
   cudf::device_span<int32_t const> const d_out_offsets_span(d_out_offsets, output_count + 1);
diff --git a/cpp/include/cudf/strings/detail/scatter.cuh b/cpp/include/cudf/strings/detail/scatter.cuh
index 7d6a07b4b10..55dd5bda260 100644
--- a/cpp/include/cudf/strings/detail/scatter.cuh
+++ b/cpp/include/cudf/strings/detail/scatter.cuh
@@ -76,7 +76,8 @@ std::unique_ptr<column> scatter(SourceIterator begin,
     begin, [] __device__(string_view const sv) { return sv.empty() ? string_view{} : sv; });
 
   // do the scatter
-  thrust::scatter(rmm::exec_policy(stream), itr, itr + size, scatter_map, target_vector.begin());
+  thrust::scatter(
+    rmm::exec_policy_nosync(stream), itr, itr + size, scatter_map, target_vector.begin());
 
   // build the output column
   auto sv_span = cudf::device_span<string_view const>(target_vector);
diff --git a/cpp/src/copying/scatter.cu b/cpp/src/copying/scatter.cu
index 4ebe465b945..6083a698560 100644
--- a/cpp/src/copying/scatter.cu
+++ b/cpp/src/copying/scatter.cu
@@ -119,7 +119,7 @@ struct column_scalar_scatterer_impl {
     auto scalar_iter =
       thrust::make_permutation_iterator(scalar_impl->data(), thrust::make_constant_iterator(0));
 
-    thrust::scatter(rmm::exec_policy(stream),
+    thrust::scatter(rmm::exec_policy_nosync(stream),
                     scalar_iter,
                     scalar_iter + scatter_rows,
                     scatter_iter,
@@ -191,8 +191,11 @@ struct column_scalar_scatterer_impl<dictionary32, MapIterator> {
     auto new_indices = std::make_unique<column>(dict_view.get_indices_annotated(), stream, mr);
     auto target_iter = indexalator_factory::make_output_iterator(new_indices->mutable_view());
 
-    thrust::scatter(
-      rmm::exec_policy(stream), scalar_iter, scalar_iter + scatter_rows, scatter_iter, target_iter);
+    thrust::scatter(rmm::exec_policy_nosync(stream),
+                    scalar_iter,
+                    scalar_iter + scatter_rows,
+                    scatter_iter,
+                    target_iter);
 
     // build the dictionary indices column from the result
     auto const indices_type = new_indices->type();
@@ -383,7 +386,7 @@ std::unique_ptr<column> boolean_mask_scatter(column_view const& input,
     data_type{type_id::INT32}, target.size(), mask_state::UNALLOCATED, stream);
   auto mutable_indices = indices->mutable_view();
 
-  thrust::sequence(rmm::exec_policy(stream),
+  thrust::sequence(rmm::exec_policy_nosync(stream),
                    mutable_indices.begin<size_type>(),
                    mutable_indices.end<size_type>(),
                    0);
diff --git a/cpp/src/lists/copying/scatter_helper.cu b/cpp/src/lists/copying/scatter_helper.cu
index cbb3aec76c5..ca7ca2f6590 100644
--- a/cpp/src/lists/copying/scatter_helper.cu
+++ b/cpp/src/lists/copying/scatter_helper.cu
@@ -185,7 +185,7 @@ struct list_child_constructor {
                                                       mr);
 
     thrust::transform(
-      rmm::exec_policy(stream),
+      rmm::exec_policy_nosync(stream),
       thrust::make_counting_iterator(0),
       thrust::make_counting_iterator(child_column->size()),
       child_column->mutable_view().begin<T>(),
@@ -237,7 +237,7 @@ struct list_child_constructor {
     auto const null_string_view = string_view{nullptr, 0};  // placeholder for factory function
 
     thrust::transform(
-      rmm::exec_policy(stream),
+      rmm::exec_policy_nosync(stream),
       thrust::make_counting_iterator<size_type>(0),
       thrust::make_counting_iterator<size_type>(string_views.size()),
       string_views.begin(),
@@ -304,7 +304,7 @@ struct list_child_constructor {
     // For instance, if a parent list_device_view has 3 elements, it should have 3 corresponding
     // child list_device_view instances.
     thrust::transform(
-      rmm::exec_policy(stream),
+      rmm::exec_policy_nosync(stream),
       thrust::make_counting_iterator<size_type>(0),
       thrust::make_counting_iterator<size_type>(child_list_views.size()),
       child_list_views.begin(),
diff --git a/cpp/tests/copying/detail_gather_tests.cu b/cpp/tests/copying/detail_gather_tests.cu
index a8abaa33ac3..bf2937ae8ab 100644
--- a/cpp/tests/copying/detail_gather_tests.cu
+++ b/cpp/tests/copying/detail_gather_tests.cu
@@ -48,7 +48,7 @@ TYPED_TEST(GatherTest, GatherDetailDeviceVectorTest)
   constexpr cudf::size_type source_size{1000};
   rmm::device_uvector<cudf::size_type> gather_map(source_size, cudf::get_default_stream());
   thrust::sequence(
-    rmm::exec_policy(cudf::get_default_stream()), gather_map.begin(), gather_map.end());
+    rmm::exec_policy_nosync(cudf::get_default_stream()), gather_map.begin(), gather_map.end());
 
   auto data = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
   cudf::test::fixed_width_column_wrapper<TypeParam> source_column(data, data + source_size);

From b16b4ff59b1a2020a1680378db7f7aeb00eb2366 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 7 Nov 2022 17:16:27 -0800
Subject: [PATCH 137/202] Remove macros that inspect the contents of exceptions
 (#12076)

We should not be encouraging users to rely specific error messages. Anywhere that is currently doing so is likely an indication that libcudf should be throwing a more specific type of exception instead of just a `cudf::logic_error`. This PR removes the testing utilities that were previously used for this purpose and reworks the relevant tests.

Related to #10200.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/12076
---
 .../developer_guide/DEVELOPER_GUIDE.md        |  10 +-
 cpp/include/cudf_test/cudf_gtest.hpp          |  52 --------
 cpp/tests/copying/get_value_tests.cpp         |   5 +-
 .../copying/segmented_gather_list_tests.cpp   |  27 +++--
 cpp/tests/error/error_handling_test.cu        |  21 +---
 cpp/tests/groupby/count_scan_tests.cpp        |  26 ++--
 cpp/tests/groupby/keys_tests.cpp              |   7 +-
 cpp/tests/groupby/rank_scan_tests.cpp         | 113 +++++++++---------
 cpp/tests/io/json_tree.cpp                    |   6 +-
 cpp/tests/io/nested_json_test.cpp             |   8 +-
 .../apply_boolean_mask_tests.cpp              |  10 +-
 cpp/tests/reductions/rank_tests.cpp           |  12 +-
 cpp/tests/sort/segmented_sort_tests.cpp       |  14 +--
 13 files changed, 124 insertions(+), 187 deletions(-)

diff --git a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
index e49e270625b..3c085984a0e 100644
--- a/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
+++ b/cpp/doxygen/developer_guide/DEVELOPER_GUIDE.md
@@ -403,6 +403,14 @@ Functions like merge or groupby in libcudf make no guarantees about the order of
 Promising deterministic ordering is not, in general, conducive to fast parallel algorithms.
 Calling code is responsible for performing sorts after the fact if sorted outputs are needed.
 
+## libcudf does not promise specific exception messages
+
+libcudf documents the exceptions that will be thrown by an API for different kinds of invalid inputs.
+The types of those exceptions (e.g. `cudf::logic_error`) are part of the public API.
+However, the explanatory string returned by the `what` method of those exceptions is not part of the API and is subject to change.
+Calling code should not rely on the contents of libcudf error messages to determine the nature of the error.
+For information on the types of exceptions that libcudf throws under different circumstances, see the [section on error handling](#errors).
+
 # libcudf API and Implementation
 
 ## Streams
@@ -837,7 +845,7 @@ description of what has broken from the past release. Label pull requests that c
 with the "non-breaking" tag.
 
 
-# Error Handling
+# Error Handling {#errors}
 
 libcudf follows conventions (and provides utilities) enforcing compile-time and run-time
 conditions and detecting and handling CUDA errors. Communication of errors is always via C++
diff --git a/cpp/include/cudf_test/cudf_gtest.hpp b/cpp/include/cudf_test/cudf_gtest.hpp
index fb2680545d3..ab45d90f2d2 100644
--- a/cpp/include/cudf_test/cudf_gtest.hpp
+++ b/cpp/include/cudf_test/cudf_gtest.hpp
@@ -110,58 +110,6 @@ struct TypeList<Types<TYPES...>> {
  */
 #define EXPECT_CUDA_SUCCEEDED(expr) EXPECT_EQ(cudaSuccess, expr)
 
-/**
- * @brief Utility for testing the expectation that an expression x throws the specified
- * exception whose what() message ends with the msg
- *
- * @param x The expression to test
- * @param exception The exception type to test for
- * @param startswith The start of the expected message
- * @param endswith The end of the expected message
- */
-#define EXPECT_THROW_MESSAGE(x, exception, startswith, endswith)    \
-  do {                                                              \
-    EXPECT_THROW(                                                   \
-      {                                                             \
-        try {                                                       \
-          x;                                                        \
-        } catch (const exception& e) {                              \
-          ASSERT_NE(nullptr, e.what());                             \
-          EXPECT_THAT(e.what(), testing::StartsWith((startswith))); \
-          EXPECT_THAT(e.what(), testing::EndsWith((endswith)));     \
-          throw;                                                    \
-        }                                                           \
-      },                                                            \
-      exception);                                                   \
-  } while (0)
-
-/**
- * @brief test macro to be expected to throw cudf::logic_error with a message
- *
- * @param x The statement to be tested
- * @param msg The message associated with the exception
- */
-#define CUDF_EXPECT_THROW_MESSAGE(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::logic_error, "cuDF failure at:", msg)
-
-/**
- * @brief test macro to be expected to throw cudf::cuda_error with a message
- *
- * @param x The statement to be tested
- * @param msg The message associated with the exception
- */
-#define CUDA_EXPECT_THROW_MESSAGE(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::cuda_error, "CUDA error encountered at:", msg)
-
-/**
- * @brief test macro to be expected to throw cudf::fatal_logic_error with a message
- *
- * @param x The statement to be tested
- * @param msg The message associated with the exception
- */
-#define FATAL_CUDA_EXPECT_THROW_MESSAGE(x, msg) \
-  EXPECT_THROW_MESSAGE(x, cudf::fatal_cuda_error, "Fatal CUDA error encountered at:", msg)
-
 /**
  * @brief test macro to be expected as no exception.
  *
diff --git a/cpp/tests/copying/get_value_tests.cpp b/cpp/tests/copying/get_value_tests.cpp
index 6d903cca020..2538cd9d851 100644
--- a/cpp/tests/copying/get_value_tests.cpp
+++ b/cpp/tests/copying/get_value_tests.cpp
@@ -81,8 +81,9 @@ TYPED_TEST(FixedWidthGetValueTest, IndexOutOfBounds)
 {
   fixed_width_column_wrapper<TypeParam, int32_t> col({9, 8, 7, 6}, {0, 1, 0, 1});
 
-  CUDF_EXPECT_THROW_MESSAGE(get_element(col, -1);, "Index out of bounds");
-  CUDF_EXPECT_THROW_MESSAGE(get_element(col, 4);, "Index out of bounds");
+  // Test for out of bounds indexes in both directions.
+  EXPECT_THROW(get_element(col, -1), cudf::logic_error);
+  EXPECT_THROW(get_element(col, 4), cudf::logic_error);
 }
 
 struct StringGetValueTest : public BaseFixture {
diff --git a/cpp/tests/copying/segmented_gather_list_tests.cpp b/cpp/tests/copying/segmented_gather_list_tests.cpp
index c05db05d57c..3ba7f668595 100644
--- a/cpp/tests/copying/segmented_gather_list_tests.cpp
+++ b/cpp/tests/copying/segmented_gather_list_tests.cpp
@@ -576,26 +576,31 @@ TEST_F(SegmentedGatherTestFloat, Fails)
   cudf::test::strings_column_wrapper nonlist_map1{"1", "2", "0", "1"};
   LCW<cudf::string_view> nonlist_map2{{"1", "2", "0", "1"}};
 
-  CUDF_EXPECT_THROW_MESSAGE(
+  // Input must be a list of integer indices. It should fail for integers,
+  // strings, or lists containing anything other than integers.
+  EXPECT_THROW(
     cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{nonlist_map0}),
-    "lists_column_view only supports lists");
+    cudf::logic_error);
 
-  CUDF_EXPECT_THROW_MESSAGE(
+  EXPECT_THROW(
     cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{nonlist_map1}),
-    "lists_column_view only supports lists");
+    cudf::logic_error);
 
-  CUDF_EXPECT_THROW_MESSAGE(
+  EXPECT_THROW(
     cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{nonlist_map2}),
-    "Gather map should be list column of index type");
+    cudf::logic_error);
 
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
   LCW<int8_t> nulls_map{{{3, 2, 1, 0}, {0}, {0}, {0, 1}}, valids};
-  CUDF_EXPECT_THROW_MESSAGE(
+
+  // Nulls are not supported in the gather map.
+  EXPECT_THROW(
     cudf::lists::detail::segmented_gather(lists_column_view{list}, lists_column_view{nulls_map}),
-    "Gather map contains nulls");
+    cudf::logic_error);
 
-  CUDF_EXPECT_THROW_MESSAGE(cudf::lists::detail::segmented_gather(
-                              lists_column_view{list}, lists_column_view{size_mismatch_map}),
-                            "Gather map and list column should be same size");
+  // Gather map and list column sizes must be the same.
+  EXPECT_THROW(cudf::lists::detail::segmented_gather(lists_column_view{list},
+                                                     lists_column_view{size_mismatch_map}),
+               cudf::logic_error);
 }
diff --git a/cpp/tests/error/error_handling_test.cu b/cpp/tests/error/error_handling_test.cu
index e83d961cd9b..eb4a3e895f9 100644
--- a/cpp/tests/error/error_handling_test.cu
+++ b/cpp/tests/error/error_handling_test.cu
@@ -29,25 +29,10 @@ TEST(ExpectsTest, FalseCondition)
 
 TEST(ExpectsTest, TrueCondition) { EXPECT_NO_THROW(CUDF_EXPECTS(true, "condition is true")); }
 
-TEST(ExpectsTest, TryCatch)
-{
-  CUDF_EXPECT_THROW_MESSAGE(CUDF_EXPECTS(false, "test reason"), "test reason");
-}
-
-TEST(CudaTryTest, Error)
-{
-  CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorLaunchFailure),
-                            "cudaErrorLaunchFailure unspecified launch failure");
-}
+TEST(CudaTryTest, Error) { EXPECT_THROW(CUDF_CUDA_TRY(cudaErrorLaunchFailure), cudf::cuda_error); }
 
 TEST(CudaTryTest, Success) { EXPECT_NO_THROW(CUDF_CUDA_TRY(cudaSuccess)); }
 
-TEST(CudaTryTest, TryCatch)
-{
-  CUDA_EXPECT_THROW_MESSAGE(CUDF_CUDA_TRY(cudaErrorMemoryAllocation),
-                            "cudaErrorMemoryAllocation out of memory");
-}
-
 TEST(StreamCheck, success) { EXPECT_NO_THROW(CUDF_CHECK_CUDA(0)); }
 
 namespace {
@@ -79,9 +64,7 @@ TEST(StreamCheck, CatchFailedKernel)
 #ifndef NDEBUG
   stream.synchronize();
 #endif
-  CUDA_EXPECT_THROW_MESSAGE(CUDF_CHECK_CUDA(stream.value()),
-                            "cudaErrorInvalidConfiguration "
-                            "invalid configuration argument");
+  EXPECT_THROW(CUDF_CHECK_CUDA(stream.value()), cudf::cuda_error);
 }
 
 __global__ void kernel() { asm("trap;"); }
diff --git a/cpp/tests/groupby/count_scan_tests.cpp b/cpp/tests/groupby/count_scan_tests.cpp
index 164e967e28e..54df690d307 100644
--- a/cpp/tests/groupby/count_scan_tests.cpp
+++ b/cpp/tests/groupby/count_scan_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -53,9 +53,10 @@ TYPED_TEST(groupby_count_scan_test, basic)
   result_wrapper expect_vals{0, 1, 2, 0, 1, 2, 3, 0, 1, 2};
   // clang-format on
 
+  // Count groupby aggregation is only supported with null_policy::EXCLUDE
   auto agg1 = cudf::make_count_aggregation<groupby_scan_aggregation>();
-  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)),
-                            "Unsupported groupby scan aggregation");
+  EXPECT_THROW(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)),
+               cudf::logic_error);
 
   auto agg2 = cudf::make_count_aggregation<groupby_scan_aggregation>(null_policy::INCLUDE);
   test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
@@ -181,13 +182,13 @@ TYPED_TEST(FixedPointTestAllReps, GroupByCountScan)
   auto const expect_vals = result_wrapper{0, 1, 2, 0, 1, 2, 3, 0, 1, 2};
   // clang-format on
 
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     vals,
-                     expect_keys,
-                     expect_vals,
-                     cudf::make_count_aggregation<groupby_scan_aggregation>()),
-    "Unsupported groupby scan aggregation");
+  // Count groupby aggregation is only supported with null_policy::EXCLUDE
+  EXPECT_THROW(test_single_scan(keys,
+                                vals,
+                                expect_keys,
+                                expect_vals,
+                                cudf::make_count_aggregation<groupby_scan_aggregation>()),
+               cudf::logic_error);
 
   auto agg2 = cudf::make_count_aggregation<groupby_scan_aggregation>(null_policy::INCLUDE);
   test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg2));
@@ -209,9 +210,10 @@ TEST_F(groupby_dictionary_count_scan_test, basic)
   result_wrapper expect_vals{0, 0, 0, 1, 0, 1};
   // clang-format on
 
+  // Count groupby aggregation is only supported with null_policy::EXCLUDE
   auto agg1 = cudf::make_count_aggregation<groupby_scan_aggregation>();
-  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)),
-                            "Unsupported groupby scan aggregation");
+  EXPECT_THROW(test_single_scan(keys, vals, expect_keys, expect_vals, std::move(agg1)),
+               cudf::logic_error);
   test_single_scan(keys,
                    vals,
                    expect_keys,
diff --git a/cpp/tests/groupby/keys_tests.cpp b/cpp/tests/groupby/keys_tests.cpp
index 19e82c4ffd1..d2f2f233953 100644
--- a/cpp/tests/groupby/keys_tests.cpp
+++ b/cpp/tests/groupby/keys_tests.cpp
@@ -234,12 +234,11 @@ TYPED_TEST(groupby_keys_test, mismatch_num_rows)
   fixed_width_column_wrapper<K> keys{1, 2, 3};
   fixed_width_column_wrapper<V> vals{0, 1, 2, 3, 4};
 
+  // Verify that scan throws an error when given data of mismatched sizes.
   auto agg = cudf::make_count_aggregation<groupby_aggregation>();
-  CUDF_EXPECT_THROW_MESSAGE(test_single_agg(keys, vals, keys, vals, std::move(agg)),
-                            "Size mismatch between request values and groupby keys.");
+  EXPECT_THROW(test_single_agg(keys, vals, keys, vals, std::move(agg)), cudf::logic_error);
   auto agg2 = cudf::make_count_aggregation<groupby_scan_aggregation>();
-  CUDF_EXPECT_THROW_MESSAGE(test_single_scan(keys, vals, keys, vals, std::move(agg2)),
-                            "Size mismatch between request values and groupby keys.");
+  EXPECT_THROW(test_single_scan(keys, vals, keys, vals, std::move(agg2)), cudf::logic_error);
 }
 
 template <typename T>
diff --git a/cpp/tests/groupby/rank_scan_tests.cpp b/cpp/tests/groupby/rank_scan_tests.cpp
index d4e8b4cbf0f..c9f31576aff 100644
--- a/cpp/tests/groupby/rank_scan_tests.cpp
+++ b/cpp/tests/groupby/rank_scan_tests.cpp
@@ -508,65 +508,60 @@ TEST_F(groupby_rank_scan_test_failures, DISABLED_test_exception_triggers)
   auto const keys = input<T>{{1, 2, 3}, null_at(2)};
   auto const col  = input<T>{3, 3, 1};
 
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     col,
-                     keys,
-                     col,
-                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
-                     null_policy::INCLUDE,
-                     sorted::NO),
-    "Rank aggregate in groupby scan requires the keys to be presorted");
-
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     col,
-                     keys,
-                     col,
-                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
-                     null_policy::INCLUDE,
-                     sorted::NO),
-    "Rank aggregate in groupby scan requires the keys to be presorted");
-
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     col,
-                     keys,
-                     col,
-                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
-                     null_policy::EXCLUDE,
-                     sorted::YES),
-    "Rank aggregate in groupby scan requires the keys to be presorted");
-
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     col,
-                     keys,
-                     col,
-                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
-                     null_policy::EXCLUDE,
-                     sorted::YES),
-    "Rank aggregate in groupby scan requires the keys to be presorted");
-
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     col,
-                     keys,
-                     col,
-                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
-                     null_policy::EXCLUDE,
-                     sorted::NO),
-    "Rank aggregate in groupby scan requires the keys to be presorted");
-
-  CUDF_EXPECT_THROW_MESSAGE(
-    test_single_scan(keys,
-                     col,
-                     keys,
-                     col,
-                     make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
-                     null_policy::EXCLUDE,
-                     sorted::NO),
-    "Rank aggregate in groupby scan requires the keys to be presorted");
+  // All of these aggregations raise exceptions unless provided presorted keys
+  EXPECT_THROW(test_single_scan(keys,
+                                col,
+                                keys,
+                                col,
+                                make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
+                                null_policy::INCLUDE,
+                                sorted::NO),
+               cudf::logic_error);
+
+  EXPECT_THROW(test_single_scan(keys,
+                                col,
+                                keys,
+                                col,
+                                make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
+                                null_policy::INCLUDE,
+                                sorted::NO),
+               cudf::logic_error);
+
+  EXPECT_THROW(test_single_scan(keys,
+                                col,
+                                keys,
+                                col,
+                                make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
+                                null_policy::EXCLUDE,
+                                sorted::YES),
+               cudf::logic_error);
+
+  EXPECT_THROW(test_single_scan(keys,
+                                col,
+                                keys,
+                                col,
+                                make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
+                                null_policy::EXCLUDE,
+                                sorted::YES),
+               cudf::logic_error);
+
+  EXPECT_THROW(test_single_scan(keys,
+                                col,
+                                keys,
+                                col,
+                                make_rank_aggregation<groupby_scan_aggregation>(rank_method::DENSE),
+                                null_policy::EXCLUDE,
+                                sorted::NO),
+               cudf::logic_error);
+
+  EXPECT_THROW(test_single_scan(keys,
+                                col,
+                                keys,
+                                col,
+                                make_rank_aggregation<groupby_scan_aggregation>(rank_method::MIN),
+                                null_policy::EXCLUDE,
+                                sorted::NO),
+               cudf::logic_error);
 }
 
 }  // namespace test
diff --git a/cpp/tests/io/json_tree.cpp b/cpp/tests/io/json_tree.cpp
index 59942eecc1d..d3cd1dd9490 100644
--- a/cpp/tests/io/json_tree.cpp
+++ b/cpp/tests/io/json_tree.cpp
@@ -750,9 +750,9 @@ TEST_F(JsonTest, TreeRepresentationError)
     cudf::io::json::detail::get_token_stream(d_input, options, stream);
 
   // Get the JSON's tree representation
-  CUDF_EXPECT_THROW_MESSAGE(
-    cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream),
-    "JSON Parser encountered an invalid format at location 6");
+  // This JSON is invalid and will raise an exception.
+  EXPECT_THROW(cuio_json::detail::get_tree_representation(tokens_gpu, token_indices_gpu, stream),
+               cudf::logic_error);
 }
 
 /**
diff --git a/cpp/tests/io/nested_json_test.cpp b/cpp/tests/io/nested_json_test.cpp
index 01a1f0647cc..5a556f35501 100644
--- a/cpp/tests/io/nested_json_test.cpp
+++ b/cpp/tests/io/nested_json_test.cpp
@@ -588,11 +588,11 @@ TEST_P(JsonParserTest, ExpectFailMixStructAndList)
     R"( [{"a":[123, {"0": 123}], "b":1.0}, {"b":1.1}, {"b":2.1}] )",
     R"( [{"a":[123, "123"], "b":1.0}, {"b":1.1}, {"b":2.1}] )"};
 
+  // libcudf does not currently support a mix of lists and structs.
   for (auto const& input : inputs_fail) {
-    CUDF_EXPECT_THROW_MESSAGE(
-      auto const cudf_table = json_parser(
-        cudf::host_span<SymbolT const>{input.data(), input.size()}, options, stream, mr),
-      "A mix of lists and structs within the same column is not supported");
+    EXPECT_THROW(auto const cudf_table = json_parser(
+                   cudf::host_span<SymbolT const>{input.data(), input.size()}, options, stream, mr),
+                 cudf::logic_error);
   }
 
   for (auto const& input : inputs_succeed) {
diff --git a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp
index a5b036210ba..17265326fde 100644
--- a/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp
+++ b/cpp/tests/lists/stream_compaction/apply_boolean_mask_tests.cpp
@@ -217,17 +217,15 @@ TEST_F(ApplyBooleanMaskTest, Failure)
     // Invalid mask type.
     auto const input  = lists<int32_t>{{1, 2, 3}, {4, 5, 6}};
     auto const filter = lists<int32_t>{{0, 0, 0}};
-    CUDF_EXPECT_THROW_MESSAGE(
-      apply_boolean_mask(lists_column_view{input}, lists_column_view{filter}),
-      "Mask must be of type BOOL8.");
+    EXPECT_THROW(apply_boolean_mask(lists_column_view{input}, lists_column_view{filter}),
+                 cudf::logic_error);
   }
   {
     // Mismatched number of rows.
     auto const input  = lists<int32_t>{{1, 2, 3}, {4, 5, 6}};
     auto const filter = filter_t{{0, 0, 0}};
-    CUDF_EXPECT_THROW_MESSAGE(
-      apply_boolean_mask(lists_column_view{input}, lists_column_view{filter}),
-      "Boolean masks column must have same number of rows as input.");
+    EXPECT_THROW(apply_boolean_mask(lists_column_view{input}, lists_column_view{filter}),
+                 cudf::logic_error);
   }
 }
 }  // namespace cudf::test
diff --git a/cpp/tests/reductions/rank_tests.cpp b/cpp/tests/reductions/rank_tests.cpp
index 73b721d5d85..a8e75aeb7e5 100644
--- a/cpp/tests/reductions/rank_tests.cpp
+++ b/cpp/tests/reductions/rank_tests.cpp
@@ -316,12 +316,12 @@ TEST(RankScanTest, ExclusiveScan)
 {
   auto const vals = input<uint32_t>{3, 4, 5};
 
-  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, *dense_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
-                            "Rank aggregation operator requires an inclusive scan");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, *rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
-                            "Rank aggregation operator requires an inclusive scan");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::scan(vals, *percent_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
-                            "Rank aggregation operator requires an inclusive scan");
+  // Only inclusive scans are supported, so these should all raise exceptions.
+  EXPECT_THROW(cudf::scan(vals, *dense_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
+               cudf::logic_error);
+  EXPECT_THROW(cudf::scan(vals, *rank, scan_type::EXCLUSIVE, INCLUDE_NULLS), cudf::logic_error);
+  EXPECT_THROW(cudf::scan(vals, *percent_rank, scan_type::EXCLUSIVE, INCLUDE_NULLS),
+               cudf::logic_error);
 }
 
 }  // namespace cudf::test
diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp
index 53642a89b3d..09007df38ce 100644
--- a/cpp/tests/sort/segmented_sort_tests.cpp
+++ b/cpp/tests/sort/segmented_sort_tests.cpp
@@ -80,14 +80,12 @@ TEST_F(SegmentedSortInt, Empty)
   CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_empty, table_empty, segments));
   CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_empty, table_empty, col_empty));
 
-  CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_empty, table_valid, segments),
-                            "Mismatch in number of rows for values and keys");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_empty, table_valid, col_empty),
-                            "Mismatch in number of rows for values and keys");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_valid, table_empty, segments),
-                            "Mismatch in number of rows for values and keys");
-  CUDF_EXPECT_THROW_MESSAGE(cudf::segmented_sort_by_key(table_valid, table_empty, col_empty),
-                            "Mismatch in number of rows for values and keys");
+  // Swapping "empty" and "valid" tables is invalid because the keys and values will be of different
+  // sizes.
+  EXPECT_THROW(cudf::segmented_sort_by_key(table_empty, table_valid, segments), cudf::logic_error);
+  EXPECT_THROW(cudf::segmented_sort_by_key(table_empty, table_valid, col_empty), cudf::logic_error);
+  EXPECT_THROW(cudf::segmented_sort_by_key(table_valid, table_empty, segments), cudf::logic_error);
+  EXPECT_THROW(cudf::segmented_sort_by_key(table_valid, table_empty, col_empty), cudf::logic_error);
 }
 
 TEST_F(SegmentedSortInt, Single)

From 35077f5140225c74449d4cd3927b55f768e4cdf8 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 7 Nov 2022 21:43:39 -0600
Subject: [PATCH 138/202] Enable returning string data from UDFs used through
 `apply` (#11933)

This PR introduces the ability to return a string from a UDF used through `DataFrame.apply` or `Series.apply`. It provides all of the plumbing needed to run the function `lambda st: return st`, but does not provide any APIs that return strings such as `strip` or `upper` - these will be added in a series of followups. A cast from `string_view` to `udf_string` is provided that numba will call when attempting to return a `string_view` into a `udf_string` array.

Authors:
  - https://github.com/brandon-b-miller
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/11933
---
 python/cudf/cudf/core/udf/__init__.py         | 39 ++++++++--------
 python/cudf/cudf/core/udf/masked_lowering.py  |  9 ++--
 python/cudf/cudf/core/udf/masked_typing.py    |  2 +-
 python/cudf/cudf/core/udf/utils.py            | 31 +++++++++----
 python/cudf/cudf/tests/test_udf_masked_ops.py |  8 ++++
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 12 +++++
 .../strings_udf/_lib/cpp/strings_udf.pxd      | 10 ++++
 .../strings_udf/_lib/cudf_jit_udf.pyx         | 21 ++++++++-
 python/strings_udf/strings_udf/_typing.py     | 34 ++++++++++----
 python/strings_udf/strings_udf/lowering.py    | 40 ++++++++++++----
 .../strings_udf/tests/test_string_udfs.py     | 46 ++++++++++++++-----
 11 files changed, 187 insertions(+), 65 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 4730f1fa296..926d2ea6cbf 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -1,10 +1,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
-import numpy as np
-from numba import cuda, types
-from numba.cuda.cudaimpl import (
-    lower as cuda_lower,
-    registry as cuda_lowering_registry,
-)
+
+from numba import types
+from numba.cuda.cudaimpl import lower as cuda_lower
 
 from cudf.core.dtypes import dtype
 from cudf.core.udf import api, row_function, utils
@@ -23,7 +20,7 @@
     | {types.boolean}
 )
 _STRING_UDFS_ENABLED = False
-
+cudf_str_dtype = dtype(str)
 try:
     import strings_udf
     from strings_udf import ptxpath
@@ -31,32 +28,32 @@
     if ptxpath:
         utils.ptx_files.append(ptxpath)
 
-        from strings_udf._lib.cudf_jit_udf import to_string_view_array
+        from strings_udf._lib.cudf_jit_udf import (
+            column_from_udf_string_array,
+            column_to_string_view_array,
+        )
         from strings_udf._typing import str_view_arg_handler, string_view
 
         from . import strings_typing  # isort: skip
         from . import strings_lowering  # isort: skip
 
-        cuda_lower(api.Masked, strings_typing.string_view, types.boolean)(
+        cuda_lower(api.Masked, string_view, types.boolean)(
             masked_lowering.masked_constructor
         )
-
-        # add an overload of pack_return(string_view)
-        cuda_lower(api.pack_return, strings_typing.string_view)(
-            masked_lowering.pack_return_scalar_impl
-        )
-
-        _supported_masked_types |= {strings_typing.string_view}
-        utils.launch_arg_getters[dtype("O")] = to_string_view_array
-        utils.masked_array_types[dtype("O")] = string_view
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
+        _supported_masked_types |= {string_view}
 
-        utils.arg_handlers.append(str_view_arg_handler)
-        row_function.itemsizes[dtype("O")] = string_view.size_bytes
+        utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
+        utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
+        utils.masked_array_types[cudf_str_dtype] = string_view
+        row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
 
+        utils.arg_handlers.append(str_view_arg_handler)
         _STRING_UDFS_ENABLED = True
 
 except ImportError as e:
     # allow cuDF to work without strings_udf
     pass
-masked_typing.register_masked_constructor(_supported_masked_types)
+
+masked_typing._register_masked_constructor_typing(_supported_masked_types)
+masked_lowering._register_masked_constructor_lowering(_supported_masked_types)
diff --git a/python/cudf/cudf/core/udf/masked_lowering.py b/python/cudf/cudf/core/udf/masked_lowering.py
index f825b6538bf..37f3117e756 100644
--- a/python/cudf/cudf/core/udf/masked_lowering.py
+++ b/python/cudf/cudf/core/udf/masked_lowering.py
@@ -372,10 +372,6 @@ def cast_masked_to_masked(context, builder, fromty, toty, val):
 
 
 # Masked constructor for use in a kernel for testing
-@lower_builtin(api.Masked, types.Boolean, types.boolean)
-@lower_builtin(api.Masked, types.Number, types.boolean)
-@lower_builtin(api.Masked, types.NPDatetime, types.boolean)
-@lower_builtin(api.Masked, types.NPTimedelta, types.boolean)
 def masked_constructor(context, builder, sig, args):
     ty = sig.return_type
     value, valid = args
@@ -385,6 +381,11 @@ def masked_constructor(context, builder, sig, args):
     return masked._getvalue()
 
 
+def _register_masked_constructor_lowering(supported_masked_types):
+    for ty in supported_masked_types:
+        lower_builtin(api.Masked, ty, types.boolean)(masked_constructor)
+
+
 # Allows us to make an instance of MaskedType a global variable
 # and properly use it inside functions we will later compile
 @cuda_lowering_registry.lower_constant(MaskedType)
diff --git a/python/cudf/cudf/core/udf/masked_typing.py b/python/cudf/cudf/core/udf/masked_typing.py
index a815a9f6dae..7baf2d585e2 100644
--- a/python/cudf/cudf/core/udf/masked_typing.py
+++ b/python/cudf/cudf/core/udf/masked_typing.py
@@ -169,7 +169,7 @@ def typeof_masked(val, c):
 
 # Implemented typing for Masked(value, valid) - the construction of a Masked
 # type in a kernel.
-def register_masked_constructor(supported_masked_types):
+def _register_masked_constructor_typing(supported_masked_types):
     class MaskedConstructor(ConcreteTemplate):
         key = api.Masked
         cases = [
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index fa79088046c..4d40d41f9c3 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -10,6 +10,8 @@
 from numba.np import numpy_support
 from numba.types import CPointer, Poison, Tuple, boolean, int64, void
 
+import rmm
+
 from cudf.core.column.column import as_column
 from cudf.core.dtypes import CategoricalDtype
 from cudf.core.udf.masked_typing import MaskedType
@@ -31,6 +33,9 @@
 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
 arg_handlers: List[Any] = []
 ptx_files: List[Any] = []
+masked_array_types: Dict[Any, Any] = {}
+launch_arg_getters: Dict[Any, Any] = {}
+output_col_getters: Dict[Any, Any] = {}
 
 
 @_cudf_nvtx_annotate
@@ -54,6 +59,7 @@ def _get_udf_return_type(argty, func: Callable, args=()):
     # Get the return type. The PTX is also returned by compile_udf, but is not
     # needed here.
     ptx, output_type = cudautils.compile_udf(func, compile_sig)
+
     if not isinstance(output_type, MaskedType):
         numba_output_type = numpy_support.from_dtype(np.dtype(output_type))
     else:
@@ -64,6 +70,7 @@ def _get_udf_return_type(argty, func: Callable, args=()):
         if not isinstance(numba_output_type, MaskedType)
         else numba_output_type.value_type
     )
+    result = result if result.is_internal else result.return_type
 
     # _get_udf_return_type will throw a TypingError if the user tries to use
     # a field in the row containing an unsupported dtype, except in the
@@ -112,9 +119,6 @@ def _supported_cols_from_frame(frame):
     }
 
 
-masked_array_types: Dict[Any, Any] = {}
-
-
 def _masked_array_type_from_col(col):
     """
     Return a type representing a tuple of arrays,
@@ -142,9 +146,12 @@ def _construct_signature(frame, return_type, args):
     actually JIT the kernel itself later, accounting for types
     and offsets. Skips columns with unsupported dtypes.
     """
-
+    if not return_type.is_internal:
+        return_type = CPointer(return_type)
+    else:
+        return_type = return_type[::1]
     # Tuple of arrays, first the output data array, then the mask
-    return_type = Tuple((return_type[::1], boolean[::1]))
+    return_type = Tuple((return_type, boolean[::1]))
     offsets = []
     sig = [return_type, int64]
     for col in _supported_cols_from_frame(frame).values():
@@ -213,7 +220,12 @@ def _compile_or_get(frame, func, args, kernel_getter=None):
     # could be a MaskedType or a scalar type.
 
     kernel, scalar_return_type = kernel_getter(frame, func, args)
-    np_return_type = numpy_support.as_dtype(scalar_return_type)
+    np_return_type = (
+        numpy_support.as_dtype(scalar_return_type)
+        if scalar_return_type.is_internal
+        else scalar_return_type.np_dtype
+    )
+
     precompiled[cache_key] = (kernel, np_return_type)
 
     return kernel, np_return_type
@@ -230,9 +242,6 @@ def _get_kernel(kernel_string, globals_, sig, func):
     return kernel
 
 
-launch_arg_getters: Dict[Any, Any] = {}
-
-
 def _get_input_args_from_frame(fr):
     args = []
     offsets = []
@@ -254,8 +263,12 @@ def _get_input_args_from_frame(fr):
 
 
 def _return_arr_from_dtype(dt, size):
+    if extensionty := masked_array_types.get(dt):
+        return rmm.DeviceBuffer(size=size * extensionty.return_type.size_bytes)
     return cp.empty(size, dtype=dt)
 
 
 def _post_process_output_col(col, retty):
+    if getter := output_col_getters.get(retty):
+        col = getter(col)
     return as_column(col, retty)
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index f1d110ba168..b4c7cef3a4c 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -868,6 +868,14 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
+def test_string_udf_return_string(str_udf_data):
+    def func(row):
+        return row["str_col"]
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 4d6690468ff..b284d58fe58 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -17,6 +17,7 @@
 #include <cudf/strings/udf/char_types.cuh>
 #include <cudf/strings/udf/search.cuh>
 #include <cudf/strings/udf/starts_with.cuh>
+#include <cudf/strings/udf/udf_string.cuh>
 
 using namespace cudf::strings::udf;
 
@@ -215,3 +216,14 @@ extern "C" __device__ int pycount(int* nb_retval, void const* str, void const* s
   *nb_retval = count(*str_view, *substr_view);
   return 0;
 }
+
+extern "C" __device__ int udf_string_from_string_view(int* nb_retbal,
+                                                      void const* str,
+                                                      void* udf_str)
+{
+  auto str_view_ptr = reinterpret_cast<cudf::string_view const*>(str);
+  auto udf_str_ptr  = reinterpret_cast<udf_string*>(udf_str);
+  *udf_str_ptr      = udf_string(*str_view_ptr);
+
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
index fb8e3a949bf..7b90760abcc 100644
--- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -11,9 +11,19 @@ from cudf._lib.cpp.types cimport size_type
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 
+cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \
+        "cudf::strings::udf" nogil:
+    cdef cppclass udf_string
+
 cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
         "cudf::strings::udf" nogil:
     cdef unique_ptr[device_buffer] to_string_view_array(column_view) except +
+    cdef unique_ptr[column] column_from_udf_string_array(
+        udf_string* strings, size_type size,
+    ) except +
+    cdef void free_udf_string_array(
+        udf_string* strings, size_type size
+    ) except +
 
 cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \
         "cudf::strings::detail" nogil:
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index db6e206843c..4fc9e473fa3 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -6,15 +6,18 @@ from libcpp.utility cimport move
 from cudf.core.buffer import as_buffer
 
 from cudf._lib.column cimport Column
-from cudf._lib.cpp.column.column cimport column_view
+from cudf._lib.cpp.column.column cimport column, column_view
 from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
 
 from strings_udf._lib.cpp.strings_udf cimport (
+    column_from_udf_string_array as cpp_column_from_udf_string_array,
+    free_udf_string_array as cpp_free_udf_string_array,
     to_string_view_array as cpp_to_string_view_array,
+    udf_string,
 )
 
 
-def to_string_view_array(Column strings_col):
+def column_to_string_view_array(Column strings_col):
     cdef unique_ptr[device_buffer] c_buffer
     cdef column_view input_view = strings_col.view()
     with nogil:
@@ -22,3 +25,17 @@ def to_string_view_array(Column strings_col):
 
     device_buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
     return as_buffer(device_buffer)
+
+
+def column_from_udf_string_array(DeviceBuffer d_buffer):
+    cdef size_t size = int(d_buffer.c_size() / sizeof(udf_string))
+    cdef udf_string* data = <udf_string*>d_buffer.c_data()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_column_from_udf_string_array(data, size))
+        cpp_free_udf_string_array(data, size)
+
+    result = Column.from_unique_ptr(move(c_result))
+
+    return result
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index 675507bccde..320958960cd 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -3,6 +3,7 @@
 import operator
 
 import llvmlite.binding as ll
+import numpy as np
 from numba import types
 from numba.core.datamodel import default_manager
 from numba.core.extending import models, register_model
@@ -23,19 +24,33 @@
 
 
 # String object definitions
-class DString(types.Type):
+class UDFString(types.Type):
+
+    np_dtype = np.dtype("object")
+
     def __init__(self):
-        super().__init__(name="dstring")
+        super().__init__(name="udf_string")
         llty = default_manager[self].get_value_type()
         self.size_bytes = llty.get_abi_size(target_data)
 
+    @property
+    def return_type(self):
+        return self
+
 
 class StringView(types.Type):
+
+    np_dtype = np.dtype("object")
+
     def __init__(self):
         super().__init__(name="string_view")
         llty = default_manager[self].get_value_type()
         self.size_bytes = llty.get_abi_size(target_data)
 
+    @property
+    def return_type(self):
+        return UDFString()
+
 
 @register_model(StringView)
 class stringview_model(models.StructModel):
@@ -56,9 +71,9 @@ def __init__(self, dmm, fe_type):
         super().__init__(dmm, fe_type, self._members)
 
 
-@register_model(DString)
-class dstring_model(models.StructModel):
-    # from dstring.hpp:
+@register_model(UDFString)
+class udf_string_model(models.StructModel):
+    # from udf_string.hpp:
     # private:
     #   char* m_data{};
     #   cudf::size_type m_bytes{};
@@ -74,8 +89,9 @@ def __init__(self, dmm, fe_type):
         super().__init__(dmm, fe_type, self._members)
 
 
-any_string_ty = (StringView, DString, types.StringLiteral)
+any_string_ty = (StringView, UDFString, types.StringLiteral)
 string_view = StringView()
+udf_string = UDFString()
 
 
 class StrViewArgHandler:
@@ -93,7 +109,9 @@ class StrViewArgHandler:
     """
 
     def prepare_args(self, ty, val, **kwargs):
-        if isinstance(ty, types.CPointer) and isinstance(ty.dtype, StringView):
+        if isinstance(ty, types.CPointer) and isinstance(
+            ty.dtype, (StringView, UDFString)
+        ):
             return types.uint64, val.ptr
         else:
             return ty, val
@@ -113,7 +131,7 @@ def generic(self, args, kws):
         if isinstance(args[0], any_string_ty) and len(args) == 1:
             # length:
             # string_view -> int32
-            # dstring -> int32
+            # udf_string -> int32
             # literal -> int32
             return nb_signature(size_type, args[0])
 
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index df0902dfa98..909b0e56187 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -5,6 +5,7 @@
 
 from numba import cuda, types
 from numba.core import cgutils
+from numba.core.datamodel import default_manager
 from numba.core.typing import signature as nb_signature
 from numba.cuda.cudadrv import nvvm
 from numba.cuda.cudaimpl import (
@@ -13,21 +14,15 @@
 )
 
 from strings_udf._lib.tables import get_character_flags_table_ptr
-from strings_udf._typing import size_type, string_view
+from strings_udf._typing import size_type, string_view, udf_string
 
 character_flags_table_ptr = get_character_flags_table_ptr()
 
-
-# read-only functions
-# We will provide only one overload for this set of functions, which will
-# expect a string_view. When a literal is encountered, numba will promote it to
-# a string_view whereas when a dstring is encountered, numba will convert it to
-# a view via its native view() method.
-
 _STR_VIEW_PTR = types.CPointer(string_view)
 
 
 # CUDA function declarations
+# read-only (input is a string_view, output is a fixed with type)
 _string_view_len = cuda.declare_device("len", size_type(_STR_VIEW_PTR))
 
 
@@ -107,6 +102,35 @@ def cast_string_literal_to_string_view(context, builder, fromty, toty, val):
     return sv._getvalue()
 
 
+@cuda_lowering_registry.lower_cast(string_view, udf_string)
+def cast_string_view_to_udf_string(context, builder, fromty, toty, val):
+    sv_ptr = builder.alloca(default_manager[fromty].get_value_type())
+    udf_str_ptr = builder.alloca(default_manager[toty].get_value_type())
+    builder.store(val, sv_ptr)
+    _ = context.compile_internal(
+        builder,
+        call_create_udf_string_from_string_view,
+        nb_signature(types.void, _STR_VIEW_PTR, types.CPointer(udf_string)),
+        (sv_ptr, udf_str_ptr),
+    )
+    result = cgutils.create_struct_proxy(udf_string)(
+        context, builder, value=builder.load(udf_str_ptr)
+    )
+
+    return result._getvalue()
+
+
+# utilities
+_create_udf_string_from_string_view = cuda.declare_device(
+    "udf_string_from_string_view",
+    types.void(types.CPointer(string_view), types.CPointer(udf_string)),
+)
+
+
+def call_create_udf_string_from_string_view(sv, udf_str):
+    _create_udf_string_from_string_view(sv, udf_str)
+
+
 # String function implementations
 def call_len_string_view(st):
     return _string_view_len(st)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index 1a5dfa00dd7..ca3fbda4eb1 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -9,14 +9,18 @@
 from numba.types import CPointer, void
 
 import cudf
+import rmm
 from cudf.testing._utils import assert_eq
 
 import strings_udf
-from strings_udf._lib.cudf_jit_udf import to_string_view_array
-from strings_udf._typing import str_view_arg_handler, string_view
+from strings_udf._lib.cudf_jit_udf import (
+    column_from_udf_string_array,
+    column_to_string_view_array,
+)
+from strings_udf._typing import str_view_arg_handler, string_view, udf_string
 
 
-def get_kernel(func, dtype):
+def get_kernel(func, dtype, size):
     """
     Create a kernel for testing a single scalar string function
     Allocates an output vector with a dtype specified by the caller
@@ -25,15 +29,19 @@ def get_kernel(func, dtype):
     """
 
     func = cuda.jit(device=True)(func)
-    outty = numba.np.numpy_support.from_dtype(dtype)
-    sig = nb_signature(void, CPointer(string_view), outty[::1])
+
+    if dtype == "str":
+        outty = CPointer(udf_string)
+    else:
+        outty = numba.np.numpy_support.from_dtype(dtype)[::1]
+    sig = nb_signature(void, CPointer(string_view), outty)
 
     @cuda.jit(
         sig, link=[strings_udf.ptxpath], extensions=[str_view_arg_handler]
     )
     def kernel(input_strings, output_col):
         id = cuda.grid(1)
-        if id < len(output_col):
+        if id < size:
             st = input_strings[id]
             result = func(st)
             output_col[id] = result
@@ -50,15 +58,22 @@ def run_udf_test(data, func, dtype):
     and then assembles the result back into a cuDF series before
     comparing it with the equivalent pandas result
     """
-    dtype = np.dtype(dtype)
+    if dtype == "str":
+        output = rmm.DeviceBuffer(size=len(data) * udf_string.size_bytes)
+    else:
+        dtype = np.dtype(dtype)
+        output = cudf.core.column.column_empty(len(data), dtype=dtype)
+
     cudf_column = cudf.core.column.as_column(data)
-    str_view_ary = to_string_view_array(cudf_column)
+    str_views = column_to_string_view_array(cudf_column)
 
-    output_ary = cudf.core.column.column_empty(len(data), dtype=dtype)
+    kernel = get_kernel(func, dtype, len(data))
+    kernel.forall(len(data))(str_views, output)
 
-    kernel = get_kernel(func, dtype)
-    kernel.forall(len(data))(str_view_ary, output_ary)
-    got = cudf.Series(output_ary, dtype=dtype)
+    if dtype == "str":
+        output = column_from_udf_string_array(output)
+
+    got = cudf.Series(output, dtype=dtype)
     expect = pd.Series(data).apply(func)
     assert_eq(expect, got, check_dtype=False)
 
@@ -256,3 +271,10 @@ def func(st):
         return st.startswith(substr)
 
     run_udf_test(data, func, "bool")
+
+
+def test_string_udf_return_string(data):
+    def func(st):
+        return st
+
+    run_udf_test(data, func, "str")

From c900fedc8dc7ce7b5ca220d50cb82ac965e643e7 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Nov 2022 09:11:37 -0600
Subject: [PATCH 139/202] Bifurcate Dependency Lists [skip-gpuci] (#11674)

This PR uses the [`rapids-dependency-file-generator`](https://github.com/rapidsai/dependency-file-generator/) to handle sourcing dependencies. Similar to https://github.com/rapidsai/rmm/pull/1073/, this PR introduces a GitHub Action that enforces consistency between the new `dependencies.yaml` file and the generated conda environment for developers.

Authors:
   - Bradley Dice (https://github.com/bdice)

Approvers:
   - AJ Schmidt (https://github.com/ajschmidt8)
   - GALI PREM SAGAR (https://github.com/galipremsagar)
---
 .github/workflows/dependency-files.yml        |  12 +
 CONTRIBUTING.md                               |   7 +-
 ci/release/update-version.sh                  |   5 +-
 .../all_cuda-115_arch-x86_64.yaml             |  78 +++++++
 conda/environments/cudf_dev_cuda11.5.yml      |  86 --------
 dependencies.yaml                             | 207 ++++++++++++++++++
 6 files changed, 302 insertions(+), 93 deletions(-)
 create mode 100644 .github/workflows/dependency-files.yml
 create mode 100644 conda/environments/all_cuda-115_arch-x86_64.yaml
 delete mode 100644 conda/environments/cudf_dev_cuda11.5.yml
 create mode 100644 dependencies.yaml

diff --git a/.github/workflows/dependency-files.yml b/.github/workflows/dependency-files.yml
new file mode 100644
index 00000000000..2ae939292d7
--- /dev/null
+++ b/.github/workflows/dependency-files.yml
@@ -0,0 +1,12 @@
+name: pr
+
+on:
+  pull_request:
+
+jobs:
+  checks:
+    secrets: inherit
+    uses: rapidsai/shared-action-workflows/.github/workflows/checks.yaml@main
+    with:
+      enable_check_size: false
+      enable_check_style: false
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9c432b6cd4c..608bd42d86c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -99,13 +99,13 @@ cd $CUDF_HOME
 **Note:** Using a conda environment is the easiest way to satisfy the library's dependencies.
 Instructions for a minimal build environment without conda are included below.
 
-- Create the conda development environment `cudf_dev`:
+- Create the conda development environment:
 
 ```bash
 # create the conda environment (assuming in base `cudf` directory)
 # note: RAPIDS currently doesn't support `channel_priority: strict`;
 # use `channel_priority: flexible` instead
-conda env create --name cudf_dev --file conda/environments/cudf_dev_cuda11.5.yml
+conda env create --name cudf_dev --file conda/environments/all_cuda-115_arch-x86_64.yaml
 # activate the environment
 conda activate cudf_dev
 ```
@@ -114,9 +114,6 @@ conda activate cudf_dev
   development environment may also need to be updated if dependency versions or
   pinnings are changed.
 
-- For other CUDA versions, check the corresponding `cudf_dev_cuda*.yml` file in
-  `conda/environments/`.
-
 #### Building without a conda environment
 
 - libcudf has the following minimal dependencies (in addition to those listed in the [General
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
index 52dc22b6c49..9dcfe093643 100755
--- a/ci/release/update-version.sh
+++ b/ci/release/update-version.sh
@@ -63,9 +63,10 @@ sed_runner 's/version = .*/version = '"'${NEXT_SHORT_TAG}'"'/g' docs/cudf/source
 sed_runner 's/release = .*/release = '"'${NEXT_FULL_TAG}'"'/g' docs/cudf/source/conf.py
 
 # bump rmm & dask-cuda
-for FILE in conda/environments/*.yml; do
-  sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE};
+for FILE in conda/environments/*.yaml dependencies.yaml; do
   sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE};
+  sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE};
+  sed_runner "s/rmm-cu11=${CURRENT_SHORT_TAG}/rmm-cu11=${NEXT_SHORT_TAG}/g" ${FILE};
 done
 
 # Doxyfile update
diff --git a/conda/environments/all_cuda-115_arch-x86_64.yaml b/conda/environments/all_cuda-115_arch-x86_64.yaml
new file mode 100644
index 00000000000..a7e5f1a04a6
--- /dev/null
+++ b/conda/environments/all_cuda-115_arch-x86_64.yaml
@@ -0,0 +1,78 @@
+# This file is generated by `rapids-dependency-file-generator`.
+# To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
+channels:
+- rapidsai
+- rapidsai-nightly
+- dask/label/dev
+- conda-forge
+- nvidia
+dependencies:
+- aiobotocore>=2.2.0
+- arrow-cpp=9
+- boto3>=1.21.21
+- botocore>=1.24.21
+- c-compiler
+- cachetools
+- cmake>=3.23.1
+- cubinlinker
+- cuda-python>=11.7.1,<12.0
+- cudatoolkit=11.5
+- cupy>=9.5.0,<12.0.0a0
+- cxx-compiler
+- cython>=0.29,<0.30
+- dask-cuda=22.12.*
+- dask>=2022.9.2
+- distributed>=2022.9.2
+- dlpack>=0.5,<0.6.0a0
+- doxygen=1.8.20
+- fastavro>=0.22.9
+- fsspec>=0.6.0
+- gcc_linux-64=9.*
+- hypothesis
+- ipython
+- librdkafka=1.7.0
+- mimesis>=4.1.0
+- moto>=4.0.8
+- myst-nb
+- nbsphinx
+- notebook>=0.5.0
+- numba>=0.56.2
+- numpy
+- numpydoc
+- nvcc_linux-64=11.5
+- nvtx>=0.2.1
+- packaging
+- pandas>=1.0,<1.6.0dev0
+- pandoc<=2.0.0
+- pip
+- pre-commit
+- protobuf>=3.20.1,<3.21.0a0
+- ptxcompiler
+- pyarrow=9.0.0
+- pydata-sphinx-theme
+- pytest
+- pytest-benchmark
+- pytest-cases
+- pytest-cov
+- pytest-xdist
+- python-confluent-kafka=1.7.0
+- python-snappy>=0.6.0
+- python>=3.8,<3.10
+- pytorch<1.12.0
+- rmm=22.12.*
+- s3fs>=2022.3.0
+- scikit-build>=0.13.1
+- scipy
+- sphinx
+- sphinx-autobuild
+- sphinx-copybutton
+- sphinx-markdown-tables
+- sphinxcontrib-websupport
+- streamz
+- sysroot_linux-64==2.17
+- transformers
+- typing_extensions
+- pip:
+  - git+https://github.com/python-streamz/streamz.git@master
+  - pyorc
+name: all_cuda-115_arch-x86_64
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
deleted file mode 100644
index 2cad2002456..00000000000
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
-
-name: cudf_dev
-channels:
-  - rapidsai
-  - rapidsai-nightly
-  - dask/label/dev
-  - conda-forge
-  - nvidia
-dependencies:
-  - c-compiler
-  - cxx-compiler
-  - clang=11.1.0
-  - clang-tools=11.1.0
-  - cupy>=9.5.0,<12.0.0a0
-  - rmm=22.12.*
-  - cmake>=3.23.1
-  - cmake_setuptools>=0.1.3
-  - scikit-build>=0.13.1
-  - python>=3.8,<3.10
-  - numba>=0.56.2
-  - numpy
-  - pandas>=1.0,<1.6.0dev0
-  - pyarrow=9
-  - fastavro>=0.22.9
-  - python-snappy>=0.6.0
-  - notebook>=0.5.0
-  - cython>=0.29,<0.30
-  - fsspec>=0.6.0
-  - pytest
-  - pytest-benchmark
-  - pytest-cases
-  - pytest-xdist
-  - sphinx
-  - sphinxcontrib-websupport
-  - nbsphinx
-  - numpydoc
-  - ipython
-  - pandoc<=2.0.0
-  - cudatoolkit=11.5
-  - cuda-python>=11.7.1,<12.0
-  - pip
-  - doxygen=1.8.20
-  - typing_extensions
-  - pre-commit
-  - dask>=2022.9.2
-  - distributed>=2022.9.2
-  - streamz
-  - arrow-cpp=9
-  - dlpack>=0.5,<0.6.0a0
-  - double-conversion
-  - rapidjson
-  - hypothesis
-  - sphinx-markdown-tables
-  - sphinx-copybutton
-  - sphinx-autobuild
-  - myst-nb
-  - scipy
-  - dask-cuda=22.12.*
-  - mimesis>=4.1.0
-  - packaging
-  - protobuf>=3.20.1,<3.21.0a0
-  - nvtx>=0.2.1
-  - cachetools
-  - transformers<=4.10.3
-  - pydata-sphinx-theme
-  - pyorc
-  - librdkafka=1.7.0
-  - python-confluent-kafka=1.7.0
-  - moto>=3.1.6
-  - boto3>=1.21.21
-  - botocore>=1.24.21
-  - aiobotocore>=2.2.0
-  - s3fs>=2022.3.0
-  - werkzeug<2.2.0 # Temporary transient dependency pinning to avoid URL-LIB3 + moto timeouts
-  - pytorch<1.12.0
-  - cubinlinker  # [linux64]
-  - gcc_linux-64=9.* # [linux64]
-  - sysroot_linux-64==2.17 # [linux64]
-  - nvcc_linux-64=11.5
-  # Un-comment following lines for ARM specific packages.
-  # - gcc_linux-aarch64=9.* # [aarch64]
-  # - sysroot_linux-aarch64==2.17 # [aarch64]
-  # - nvcc_linux-aarch64=11.5 # [aarch64]
-  - pip:
-      - git+https://github.com/python-streamz/streamz.git@master
diff --git a/dependencies.yaml b/dependencies.yaml
new file mode 100644
index 00000000000..b8470f02f86
--- /dev/null
+++ b/dependencies.yaml
@@ -0,0 +1,207 @@
+# Dependency list for https://github.com/rapidsai/dependency-file-generator
+files:
+  all:
+    output: conda
+    matrix:
+      cuda: ["11.5"]
+      arch: [x86_64]
+    includes:
+      - build
+      - cudatoolkit
+      - develop
+      - doc
+      - notebook
+      - run
+      - test_python
+  test_cpp:
+    output: none
+    includes:
+      - cudatoolkit
+  test_python:
+    output: none
+    includes:
+      - cudatoolkit
+      - py_version
+      - test_python
+  checks:
+    output: none
+    includes:
+      - build
+      - develop
+      - py_version
+channels:
+  - rapidsai
+  - rapidsai-nightly
+  - dask/label/dev
+  - conda-forge
+  - nvidia
+dependencies:
+  build:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - cmake>=3.23.1
+          - cuda-python>=11.7.1,<12.0
+          - cython>=0.29,<0.30
+          - dlpack>=0.5,<0.6.0a0
+          - pyarrow=9.0.0
+          - rmm=22.12.*
+          - scikit-build>=0.13.1
+      - output_types: conda
+        packages:
+          - arrow-cpp=9
+          - c-compiler
+          - cxx-compiler
+          - librdkafka=1.7.0
+          - protobuf>=3.20.1,<3.21.0a0
+          - python>=3.8,<3.10
+    specific:
+      - output_types: conda
+        matrices:
+        - matrix:
+            arch: x86_64
+          packages:
+            - gcc_linux-64=9.*
+            - sysroot_linux-64==2.17
+        - matrix:
+            arch: aarch64
+          packages:
+            - gcc_linux-aarch64=9.*
+            - sysroot_linux-aarch64==2.17
+      - output_types: conda
+        matrices:
+        - matrix:
+            arch: x86_64
+            cuda: "11.5"
+          packages:
+            - nvcc_linux-64=11.5
+        - matrix:
+            arch: aarch64
+            cuda: "11.5"
+          packages:
+            - nvcc_linux-aarch64=11.5
+  cudatoolkit:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              cuda: "11.2"
+            packages:
+              - cudatoolkit=11.2
+          - matrix:
+              cuda: "11.4"
+            packages:
+              - cudatoolkit=11.4
+          - matrix:
+              cuda: "11.5"
+            packages:
+              - cudatoolkit=11.5
+  develop:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - pre-commit
+      - output_types: conda
+        packages:
+          - doxygen=1.8.20  # pre-commit hook needs a specific version.
+  doc:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - myst-nb
+          - nbsphinx
+          - numpydoc
+          - pandoc<=2.0.0  # We should check and fix all "<=" pinnings
+          - pydata-sphinx-theme
+          - sphinx
+          - sphinx-autobuild
+          - sphinx-copybutton
+          - sphinx-markdown-tables
+          - sphinxcontrib-websupport
+  notebook:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - ipython
+          - notebook>=0.5.0
+  py_version:
+    specific:
+      - output_types: conda
+        matrices:
+          - matrix:
+              py: "3.8"
+            packages:
+              - python=3.8
+          - matrix:
+              py: "3.9"
+            packages:
+              - python=3.9
+  run:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - cachetools
+          - dask>=2022.9.2
+          - distributed>=2022.9.2
+          - fsspec>=0.6.0
+          - numba>=0.56.2
+          - numpy
+          - nvtx>=0.2.1
+          - packaging
+          - pandas>=1.0,<1.6.0dev0
+          - python-confluent-kafka=1.7.0
+          - streamz
+          - typing_extensions
+      - output_types: conda
+        packages:
+          - cubinlinker
+          - cupy>=9.5.0,<12.0.0a0
+          - pip
+          - pip:
+              - git+https://github.com/python-streamz/streamz.git@master
+              - pyorc
+          - ptxcompiler
+          - rmm=22.12.*
+      - output_types: requirements
+        packages:
+          # pip recognizes the index as a global option for the requirements.txt file
+          # This index is needed for rmm, cubinlinker, ptxcompiler.
+          - --extra-index-url=https://pypi.ngc.nvidia.com
+          - cubinlinker-cu11
+          - git+https://github.com/python-streamz/streamz.git@master
+          - ptxcompiler-cu11
+          - pyorc
+          - rmm-cu11=22.12.*
+    specific:
+      - output_types: requirements
+        matrices:
+          - matrix:
+              arch: x86_64
+            packages:
+              - cupy-cuda115>=9.5.0,<12.0.0a0  # TODO: This might change to cupy-cuda11x?
+          - matrix:
+              arch: aarch64
+            packages:
+              - cupy-cuda11x -f https://pip.cupy.dev/aarch64  # TODO: Verify that this works.
+  test_python:
+    common:
+      - output_types: [conda, requirements]
+        packages:
+          - aiobotocore>=2.2.0
+          - boto3>=1.21.21
+          - botocore>=1.24.21
+          - dask-cuda=22.12.*
+          - fastavro>=0.22.9
+          - hypothesis
+          - mimesis>=4.1.0
+          - moto>=4.0.8
+          - pytest
+          - pytest-benchmark
+          - pytest-cases
+          - pytest-cov
+          - pytest-xdist
+          - python-snappy>=0.6.0
+          - pytorch<1.12.0  # We should check and fix all "<=" pinnings
+          - s3fs>=2022.3.0
+          - scipy
+          - transformers

From 8ee5f51f971c4994694f34bb52524540f807f7fc Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 8 Nov 2022 14:29:35 -0800
Subject: [PATCH 140/202] Enable building against the libarrow contained in
 pyarrow (#12034)

This feature is a prerequisite for wheels. There is no real good reason to do this except to provide interop with a pyarrow wheel, so this option is marked as advanced. In the process of implementing this feature, I have also done some cleanup of `get_arrow.cmake` to try and simplify its logic.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Paul Taylor (https://github.com/trxcllnt)
  - Robert Maynard (https://github.com/robertmaynard)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12034
---
 cpp/CMakeLists.txt                   |   2 +
 cpp/cmake/thirdparty/get_arrow.cmake | 188 ++++++++++++++++++---------
 python/cudf/CMakeLists.txt           |  16 +++
 3 files changed, 141 insertions(+), 65 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 75de15bdf22..e13b1747a7e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -69,6 +69,8 @@ option(CUDA_ENABLE_LINEINFO
 option(CUDA_WARNINGS_AS_ERRORS "Enable -Werror=all-warnings for all CUDA compilation" ON)
 # cudart can be statically linked or dynamically linked. The python ecosystem wants dynamic linking
 option(CUDA_STATIC_RUNTIME "Statically link the CUDA runtime" OFF)
+option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
+mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
 message(VERBOSE "CUDF: Build with NVTX support: ${USE_NVTX}")
 message(VERBOSE "CUDF: Configure CMake to build tests: ${BUILD_TESTS}")
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 9fa5b9d1658..94dcdcb5bc2 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -20,43 +20,98 @@
 
 # cmake-lint: disable=R0912,R0913,R0915
 
+include_guard(GLOBAL)
+
+# Generate a FindArrow module for the case where we need to search for arrow within a pip install
+# pyarrow.
+function(find_libarrow_in_python_wheel PYARROW_VERSION)
+  string(REPLACE "." "" PYARROW_SO_VER "${PYARROW_VERSION}")
+  set(PYARROW_LIB libarrow.so.${PYARROW_SO_VER})
+
+  find_package(Python REQUIRED)
+  execute_process(
+    COMMAND "${Python_EXECUTABLE}" -c "import pyarrow; print(pyarrow.get_library_dirs()[0])"
+    OUTPUT_VARIABLE CUDF_PYARROW_WHEEL_DIR
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+  )
+  list(APPEND CMAKE_PREFIX_PATH "${CUDF_PYARROW_WHEEL_DIR}")
+  rapids_find_generate_module(
+    Arrow NO_CONFIG
+    VERSION "${PYARROW_VERSION}"
+    LIBRARY_NAMES "${PYARROW_LIB}"
+    BUILD_EXPORT_SET cudf-exports
+    INSTALL_EXPORT_SET cudf-exports
+    HEADER_NAMES arrow/python/arrow_to_pandas.h
+  )
+
+  find_package(Arrow ${PYARROW_VERSION} MODULE REQUIRED GLOBAL)
+  add_library(arrow_shared ALIAS Arrow::Arrow)
+
+  # When using the libarrow inside a wheel we must build libcudf with the old ABI because pyarrow's
+  # `libarrow.so` is compiled for manylinux2014 (centos7 toolchain) which uses the old ABI. Note
+  # that these flags will often be redundant because we build wheels in manylinux containers that
+  # actually have the old libc++ anyway, but setting them explicitly ensures correct and consistent
+  # behavior in all other cases such as aarch builds on newer manylinux or testing builds in newer
+  # containers. Note that tests will not build successfully without also propagating these options
+  # to builds of GTest. Similarly, benchmarks will not work without updating GBench (and possibly
+  # NVBench) builds. We are currently ignoring these limitations since we don't anticipate using
+  # this feature except for building wheels.
+  target_compile_options(
+    Arrow::Arrow INTERFACE "$<$<COMPILE_LANGUAGE:CXX>:-D_GLIBCXX_USE_CXX11_ABI=0>"
+                           "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler=-D_GLIBCXX_USE_CXX11_ABI=0>"
+  )
+
+  rapids_export_package(BUILD Arrow cudf-exports)
+  rapids_export_package(INSTALL Arrow cudf-exports)
+
+  list(POP_BACK CMAKE_PREFIX_PATH)
+endfunction()
+
 # This function finds arrow and sets any additional necessary environment variables.
 function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENABLE_PYTHON
          ENABLE_PARQUET
 )
 
+  if(USE_LIBARROW_FROM_PYARROW)
+    # Generate a FindArrow.cmake to find pyarrow's libarrow.so
+    find_libarrow_in_python_wheel(${VERSION})
+    set(ARROW_FOUND
+        TRUE
+        PARENT_SCOPE
+    )
+    set(ARROW_LIBRARIES
+        arrow_shared
+        PARENT_SCOPE
+    )
+    return()
+  endif()
+
   if(BUILD_STATIC)
     if(TARGET arrow_static)
-      list(APPEND ARROW_LIBRARIES arrow_static)
       set(ARROW_FOUND
           TRUE
           PARENT_SCOPE
       )
       set(ARROW_LIBRARIES
-          ${ARROW_LIBRARIES}
+          arrow_static
           PARENT_SCOPE
       )
       return()
     endif()
   else()
     if(TARGET arrow_shared)
-      list(APPEND ARROW_LIBRARIES arrow_shared)
       set(ARROW_FOUND
           TRUE
           PARENT_SCOPE
       )
       set(ARROW_LIBRARIES
-          ${ARROW_LIBRARIES}
+          arrow_shared
           PARENT_SCOPE
       )
       return()
     endif()
   endif()
 
-  set(ARROW_BUILD_SHARED ON)
-  set(ARROW_BUILD_STATIC OFF)
-  set(CPMAddOrFindPackage CPMFindPackage)
-
   if(NOT ARROW_ARMV8_ARCH)
     set(ARROW_ARMV8_ARCH "armv8-a")
   endif()
@@ -69,8 +124,11 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
     set(ARROW_BUILD_STATIC ON)
     set(ARROW_BUILD_SHARED OFF)
     # Turn off CPM using `find_package` so we always download and make sure we get proper static
-    # library
-    set(CPM_DOWNLOAD_ALL TRUE)
+    # library.
+    set(CPM_DOWNLOAD_Arrow TRUE)
+  else()
+    set(ARROW_BUILD_SHARED ON)
+    set(ARROW_BUILD_STATIC OFF)
   endif()
 
   set(ARROW_PYTHON_OPTIONS "")
@@ -91,7 +149,8 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
 
   rapids_cpm_find(
     Arrow ${VERSION}
-    GLOBAL_TARGETS arrow_shared parquet_shared arrow_dataset_shared
+    GLOBAL_TARGETS arrow_shared parquet_shared arrow_dataset_shared arrow_static parquet_static
+                   arrow_dataset_static
     CPM_ARGS
     GIT_REPOSITORY https://github.com/apache/arrow.git
     GIT_TAG apache-arrow-${VERSION}
@@ -125,61 +184,65 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
             "xsimd_SOURCE AUTO"
   )
 
-  set(ARROW_FOUND TRUE)
-  set(ARROW_LIBRARIES "")
+  set(ARROW_FOUND
+      TRUE
+      PARENT_SCOPE
+  )
 
-  # Arrow_ADDED: set if CPM downloaded Arrow from Github Arrow_DIR:   set if CPM found Arrow on the
-  # system/conda/etc.
-  if(Arrow_ADDED OR Arrow_DIR)
-    if(BUILD_STATIC)
-      list(APPEND ARROW_LIBRARIES arrow_static)
-    else()
-      list(APPEND ARROW_LIBRARIES arrow_shared)
-    endif()
+  if(BUILD_STATIC)
+    set(ARROW_LIBRARIES arrow_static)
+  else()
+    set(ARROW_LIBRARIES arrow_shared)
+  endif()
 
-    if(Arrow_DIR)
-      find_package(Arrow REQUIRED QUIET)
-      if(ENABLE_PARQUET)
-        if(NOT Parquet_DIR)
-          # Set this to enable `find_package(Parquet)`
-          set(Parquet_DIR "${Arrow_DIR}")
-        endif()
-        # Set this to enable `find_package(ArrowDataset)`
-        set(ArrowDataset_DIR "${Arrow_DIR}")
-        find_package(ArrowDataset REQUIRED QUIET)
+  # Arrow_DIR:   set if CPM found Arrow on the system/conda/etc.
+  if(Arrow_DIR)
+    # This extra find_package is necessary because rapids_cpm_find does not propagate all the
+    # variables from find_package that we might need. This is especially problematic when
+    # rapids_cpm_find builds from source.
+    find_package(Arrow REQUIRED QUIET)
+    if(ENABLE_PARQUET)
+      # Setting Parquet_DIR is conditional because parquet may be installed independently of arrow.
+      if(NOT Parquet_DIR)
+        # Set this to enable `find_package(Parquet)`
+        set(Parquet_DIR "${Arrow_DIR}")
       endif()
-    elseif(Arrow_ADDED)
-      # Copy these files so we can avoid adding paths in Arrow_BINARY_DIR to
-      # target_include_directories. That defeats ccache.
-      file(INSTALL "${Arrow_BINARY_DIR}/src/arrow/util/config.h"
-           DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/util"
+      # Set this to enable `find_package(ArrowDataset)`
+      set(ArrowDataset_DIR "${Arrow_DIR}")
+      find_package(ArrowDataset REQUIRED QUIET)
+    endif()
+    # Arrow_ADDED: set if CPM downloaded Arrow from Github
+  elseif(Arrow_ADDED)
+    # Copy these files so we can avoid adding paths in Arrow_BINARY_DIR to
+    # target_include_directories. That defeats ccache.
+    file(INSTALL "${Arrow_BINARY_DIR}/src/arrow/util/config.h"
+         DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/arrow/util"
+    )
+    if(ENABLE_PARQUET)
+      file(INSTALL "${Arrow_BINARY_DIR}/src/parquet/parquet_version.h"
+           DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/parquet"
       )
-      if(ENABLE_PARQUET)
-        file(INSTALL "${Arrow_BINARY_DIR}/src/parquet/parquet_version.h"
-             DESTINATION "${Arrow_SOURCE_DIR}/cpp/src/parquet"
-        )
-      endif()
-      #
-      # This shouldn't be necessary!
-      #
-      # Arrow populates INTERFACE_INCLUDE_DIRECTORIES for the `arrow_static` and `arrow_shared`
-      # targets in FindArrow, so for static source-builds, we have to do it after-the-fact.
-      #
-      # This only works because we know exactly which components we're using. Don't forget to update
-      # this list if we add more!
-      #
-      foreach(ARROW_LIBRARY ${ARROW_LIBRARIES})
-        target_include_directories(
-          ${ARROW_LIBRARY}
-          INTERFACE "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/src>"
-                    "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/src/generated>"
-                    "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/thirdparty/hadoop/include>"
-                    "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/thirdparty/flatbuffers/include>"
-        )
-      endforeach()
     endif()
+    # Arrow populates INTERFACE_INCLUDE_DIRECTORIES for the `arrow_static` and `arrow_shared`
+    # targets in FindArrow, so for static source-builds, we have to do it after-the-fact.
+    #
+    # This only works because we know exactly which components we're using. Don't forget to update
+    # this list if we add more!
+    #
+    foreach(ARROW_LIBRARY ${ARROW_LIBRARIES})
+      target_include_directories(
+        ${ARROW_LIBRARY}
+        INTERFACE "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/src>"
+                  "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/src/generated>"
+                  "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/thirdparty/hadoop/include>"
+                  "$<BUILD_INTERFACE:${Arrow_SOURCE_DIR}/cpp/thirdparty/flatbuffers/include>"
+      )
+    endforeach()
   else()
-    set(ARROW_FOUND FALSE)
+    set(ARROW_FOUND
+        FALSE
+        PARENT_SCOPE
+    )
     message(FATAL_ERROR "CUDF: Arrow library not found or downloaded.")
   endif()
 
@@ -294,15 +357,10 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
     rapids_export_find_package_root(BUILD ArrowDataset [=[${CMAKE_CURRENT_LIST_DIR}]=] cudf-exports)
   endif()
 
-  set(ARROW_FOUND
-      "${ARROW_FOUND}"
-      PARENT_SCOPE
-  )
   set(ARROW_LIBRARIES
       "${ARROW_LIBRARIES}"
       PARENT_SCOPE
   )
-
 endfunction()
 
 if(NOT DEFINED CUDF_VERSION_Arrow)
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index f8eb3af86d7..8a3224237b6 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -31,9 +31,25 @@ project(
 option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
        OFF
 )
+option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
+mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
 # If the user requested it we attempt to find CUDF.
 if(FIND_CUDF_CPP)
+  if(USE_LIBARROW_FROM_PYARROW)
+    # We need to find arrow before libcudf since libcudf requires it but doesn't bundle it. TODO:
+    # These options should probably all become optional since in practice they aren't meaningful
+    # except in the case where we actually compile Arrow.
+    set(CUDF_USE_ARROW_STATIC OFF)
+    set(CUDF_ENABLE_ARROW_S3 OFF)
+    set(CUDF_ENABLE_ARROW_ORC OFF)
+    set(CUDF_ENABLE_ARROW_PYTHON OFF)
+    set(CUDF_ENABLE_ARROW_PARQUET OFF)
+    include(rapids-find)
+    include(rapids-export)
+    include(../../cpp/cmake/thirdparty/get_arrow.cmake)
+  endif()
+
   find_package(cudf ${cudf_version} REQUIRED)
 else()
   set(cudf_FOUND OFF)

From 7535f31cfaf7e01578c413bb3ba46b03d2014806 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Tue, 8 Nov 2022 16:58:45 -0600
Subject: [PATCH 141/202] Remove CUDA 10 compatibility code. (#12088)

This PR updates some documentation and removes some compatibility layers referencing CUDA 10, which is no longer supported by the package.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - David Wendt (https://github.com/davidwendt)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12088
---
 cpp/src/copying/concatenate.cu         |  6 ++----
 cpp/src/rolling/detail/rolling.cuh     | 20 +++++++-------------
 cpp/src/rolling/jit/kernel.cu          |  8 ++------
 cpp/src/strings/copying/concatenate.cu | 12 ++++--------
 python/custreamz/README.md             |  4 ++--
 5 files changed, 17 insertions(+), 33 deletions(-)

diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index 802b47e4664..577d6427b19 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -180,10 +180,8 @@ __global__ void fused_concatenate_kernel(column_device_view const* input_views,
   if (Nullable) { active_mask = __ballot_sync(0xFFFF'FFFFu, output_index < output_size); }
   while (output_index < output_size) {
     // Lookup input index by searching for output index in offsets
-    // thrust::prev isn't in CUDA 10.0, so subtracting 1 here instead
-    auto const offset_it =
-      -1 + thrust::upper_bound(
-             thrust::seq, input_offsets, input_offsets + num_input_views, output_index);
+    auto const offset_it            = thrust::prev(thrust::upper_bound(
+      thrust::seq, input_offsets, input_offsets + num_input_views, output_index));
     size_type const partition_index = offset_it - input_offsets;
 
     // Copy input data to output
diff --git a/cpp/src/rolling/detail/rolling.cuh b/cpp/src/rolling/detail/rolling.cuh
index 4394557e453..68480dbf773 100644
--- a/cpp/src/rolling/detail/rolling.cuh
+++ b/cpp/src/rolling/detail/rolling.cuh
@@ -122,10 +122,8 @@ struct DeviceRolling {
     using AggOp = typename corresponding_operator<op>::type;
     AggOp agg_op;
 
-    // declare this as volatile to avoid some compiler optimizations that lead to incorrect results
-    // for CUDA 10.0 and below (fixed in CUDA 10.1)
-    volatile cudf::size_type count = 0;
-    OutputType val                 = AggOp::template identity<OutputType>();
+    cudf::size_type count = 0;
+    OutputType val        = AggOp::template identity<OutputType>();
 
     for (size_type j = start_index; j < end_index; j++) {
       if (!has_nulls || input.is_valid(j)) {
@@ -190,11 +188,9 @@ struct DeviceRollingArgMinMaxString : DeviceRollingArgMinMaxBase<cudf::string_vi
     using AggOp     = typename corresponding_operator<op>::type;
     AggOp agg_op;
 
-    // declare this as volatile to avoid some compiler optimizations that lead to incorrect results
-    // for CUDA 10.0 and below (fixed in CUDA 10.1)
-    volatile cudf::size_type count = 0;
-    InputType val                  = AggOp::template identity<InputType>();
-    OutputType val_index           = default_output;
+    cudf::size_type count = 0;
+    InputType val         = AggOp::template identity<InputType>();
+    OutputType val_index  = default_output;
 
     for (size_type j = start_index; j < end_index; j++) {
       if (!has_nulls || input.is_valid(j)) {
@@ -284,13 +280,11 @@ struct DeviceRollingCountValid {
                              size_type end_index,
                              size_type current_index)
   {
-    // declare this as volatile to avoid some compiler optimizations that lead to incorrect
-    // results for CUDA 10.0 and below (fixed in CUDA 10.1)
-    volatile cudf::size_type count = 0;
-
     bool output_is_valid = ((end_index - start_index) >= min_periods);
 
     if (output_is_valid) {
+      cudf::size_type count = 0;
+
       if (!has_nulls) {
         count = end_index - start_index;
       } else {
diff --git a/cpp/src/rolling/jit/kernel.cu b/cpp/src/rolling/jit/kernel.cu
index ecdbbb6a0f2..3bfee32d1cc 100644
--- a/cpp/src/rolling/jit/kernel.cu
+++ b/cpp/src/rolling/jit/kernel.cu
@@ -58,10 +58,6 @@ __global__ void gpu_rolling_new(cudf::size_type nrows,
 
   auto active_threads = __ballot_sync(0xffff'ffffu, i < nrows);
   while (i < nrows) {
-    // declare this as volatile to avoid some compiler optimizations that lead to incorrect results
-    // for CUDA 10.0 and below (fixed in CUDA 10.1)
-    volatile cudf::size_type count = 0;
-
     int64_t const preceding_window = get_window(preceding_window_begin, i);
     int64_t const following_window = get_window(following_window_begin, i);
 
@@ -77,8 +73,8 @@ __global__ void gpu_rolling_new(cudf::size_type nrows,
     // TODO: We should explore using shared memory to avoid redundant loads.
     //       This might require separating the kernel into a special version
     //       for dynamic and static sizes.
-    count       = end_index - start_index;
-    OutType val = agg_op::template operate<OutType, InType>(in_col, start_index, count);
+    cudf::size_type count = end_index - start_index;
+    OutType val           = agg_op::template operate<OutType, InType>(in_col, start_index, count);
 
     // check if we have enough input samples
     bool const output_is_valid = (count >= min_periods);
diff --git a/cpp/src/strings/copying/concatenate.cu b/cpp/src/strings/copying/concatenate.cu
index 627e689d4d9..e44c343e31b 100644
--- a/cpp/src/strings/copying/concatenate.cu
+++ b/cpp/src/strings/copying/concatenate.cu
@@ -127,10 +127,8 @@ __global__ void fused_concatenate_string_offset_kernel(column_device_view const*
   if (Nullable) { active_mask = __ballot_sync(0xFFFF'FFFFu, output_index < output_size); }
   while (output_index < output_size) {
     // Lookup input index by searching for output index in offsets
-    // thrust::prev isn't in CUDA 10.0, so subtracting 1 here instead
-    auto const offset_it =
-      -1 + thrust::upper_bound(
-             thrust::seq, input_offsets, input_offsets + num_input_views, output_index);
+    auto const offset_it            = thrust::prev(thrust::upper_bound(
+      thrust::seq, input_offsets, input_offsets + num_input_views, output_index));
     size_type const partition_index = offset_it - input_offsets;
 
     auto const offset_index      = output_index - *offset_it;
@@ -180,10 +178,8 @@ __global__ void fused_concatenate_string_chars_kernel(column_device_view const*
 
   while (output_index < output_size) {
     // Lookup input index by searching for output index in offsets
-    // thrust::prev isn't in CUDA 10.0, so subtracting 1 here instead
-    auto const offset_it =
-      -1 + thrust::upper_bound(
-             thrust::seq, partition_offsets, partition_offsets + num_input_views, output_index);
+    auto const offset_it            = thrust::prev(thrust::upper_bound(
+      thrust::seq, partition_offsets, partition_offsets + num_input_views, output_index));
     size_type const partition_index = offset_it - partition_offsets;
 
     auto const offset_index = output_index - *offset_it;
diff --git a/python/custreamz/README.md b/python/custreamz/README.md
index 99ada746ec8..a1d98425d66 100644
--- a/python/custreamz/README.md
+++ b/python/custreamz/README.md
@@ -48,8 +48,8 @@ Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapids
 
 ### CUDA/GPU requirements
 
-* CUDA 10.0+
-* NVIDIA driver 410.48+
+* CUDA 11.0+
+* NVIDIA driver 450.80.02+
 * Pascal architecture or better (Compute Capability >=6.0)
 
 ### Conda

From 628cd4f387fec2ec49025991182192bb125657ae Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 9 Nov 2022 08:40:55 -0500
Subject: [PATCH 142/202] Change cudf::detail::tdigest to cudf::tdigest::detail
 (#12050)

Changes `cudf::detail::tdigest` to `cudf::tdigest::detail` in the tdigest source files.
While working on #12049, found there was a mixture of `cudf::tdigest` and `cudf::detail::tdigest` that seemed confusing and inconsistent. Changing to `cudf::tdigest::detail` made this code easier to follow.
Also, move the `size_begin()` member function in `tdigest_column_view` out as a standalone function in a separate `.cuh` header since it is only used in a few places and the `tdigest_column_view.cuh` is included in many places. This allowed changing the `tdigest_column_view.cuh` to a `.hpp` file.

Depends on #12049

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12050
---
 conda/recipes/libcudf/meta.yaml               |  2 +-
 cpp/include/cudf/detail/quantiles.hpp         |  2 +-
 cpp/include/cudf/detail/tdigest/tdigest.hpp   |  5 +-
 cpp/include/cudf/quantiles.hpp                |  2 +-
 ...olumn_view.cuh => tdigest_column_view.hpp} | 29 ----------
 cpp/include/cudf_test/tdigest_utilities.cuh   | 12 ++--
 cpp/src/groupby/sort/aggregate.cpp            |  4 +-
 cpp/src/quantiles/tdigest/tdigest.cu          | 32 +++++------
 .../quantiles/tdigest/tdigest_aggregation.cu  | 37 ++++++------
 .../quantiles/tdigest/tdigest_column_view.cpp |  2 +-
 cpp/src/quantiles/tdigest/tdigest_util.cuh    | 56 +++++++++++++++++++
 cpp/src/reductions/reductions.cpp             |  6 +-
 cpp/tests/groupby/tdigest_tests.cu            |  8 +--
 .../quantiles/percentile_approx_test.cpp      |  4 +-
 cpp/tests/utilities/tdigest_utilities.cu      |  2 +-
 java/src/main/native/src/ColumnViewJni.cpp    |  2 +-
 16 files changed, 116 insertions(+), 89 deletions(-)
 rename cpp/include/cudf/tdigest/{tdigest_column_view.cuh => tdigest_column_view.hpp} (82%)
 create mode 100644 cpp/src/quantiles/tdigest/tdigest_util.cuh

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 1d0153c94be..e4697206d4d 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -254,7 +254,7 @@ outputs:
         - test -f $PREFIX/include/cudf/structs/structs_column_view.hpp
         - test -f $PREFIX/include/cudf/table/table.hpp
         - test -f $PREFIX/include/cudf/table/table_view.hpp
-        - test -f $PREFIX/include/cudf/tdigest/tdigest_column_view.cuh
+        - test -f $PREFIX/include/cudf/tdigest/tdigest_column_view.hpp
         - test -f $PREFIX/include/cudf/transform.hpp
         - test -f $PREFIX/include/cudf/transpose.hpp
         - test -f $PREFIX/include/cudf/types.hpp
diff --git a/cpp/include/cudf/detail/quantiles.hpp b/cpp/include/cudf/detail/quantiles.hpp
index 752f8ef6367..3764b03641e 100644
--- a/cpp/include/cudf/detail/quantiles.hpp
+++ b/cpp/include/cudf/detail/quantiles.hpp
@@ -16,7 +16,7 @@
 #pragma once
 
 #include <cudf/quantiles.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
diff --git a/cpp/include/cudf/detail/tdigest/tdigest.hpp b/cpp/include/cudf/detail/tdigest/tdigest.hpp
index 77f9978ff1b..9df3f9daf3f 100644
--- a/cpp/include/cudf/detail/tdigest/tdigest.hpp
+++ b/cpp/include/cudf/detail/tdigest/tdigest.hpp
@@ -23,9 +23,8 @@
 #include <rmm/cuda_stream_view.hpp>
 
 namespace cudf {
-namespace detail {
-
 namespace tdigest {
+namespace detail {
 
 /**
  * @brief Generate a tdigest column from a grouped set of numeric input values.
@@ -328,6 +327,6 @@ std::unique_ptr<scalar> reduce_merge_tdigest(column_view const& input,
                                              rmm::cuda_stream_view stream,
                                              rmm::mr::device_memory_resource* mr);
 
-}  // namespace tdigest
 }  // namespace detail
+}  // namespace tdigest
 }  // namespace cudf
diff --git a/cpp/include/cudf/quantiles.hpp b/cpp/include/cudf/quantiles.hpp
index 531c7e3477d..1f3c26fa077 100644
--- a/cpp/include/cudf/quantiles.hpp
+++ b/cpp/include/cudf/quantiles.hpp
@@ -18,7 +18,7 @@
 
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/table/table_view.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/types.hpp>
 
 #include <rmm/mr/device/per_device_resource.hpp>
diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.cuh b/cpp/include/cudf/tdigest/tdigest_column_view.hpp
similarity index 82%
rename from cpp/include/cudf/tdigest/tdigest_column_view.cuh
rename to cpp/include/cudf/tdigest/tdigest_column_view.hpp
index 0ffd9578126..c63e2b16326 100644
--- a/cpp/include/cudf/tdigest/tdigest_column_view.cuh
+++ b/cpp/include/cudf/tdigest/tdigest_column_view.hpp
@@ -16,7 +16,6 @@
 #pragma once
 
 #include <cudf/column/column_view.hpp>
-#include <cudf/detail/iterator.cuh>
 #include <cudf/lists/lists_column_view.hpp>
 
 namespace cudf {
@@ -109,34 +108,6 @@ class tdigest_column_view : private column_view {
    */
   [[nodiscard]] column_view weights() const;
 
-  /**
-   * @brief Functor to compute the size of each tdigest of a column.
-   */
-  struct tdigest_size_fn {
-    size_type const* offsets;  ///< Offsets of the t-digest column
-    /**
-     * @brief Returns size of the each tdigest in the column
-     *
-     * @param tdigest_index Index of the tdigest in the column
-     * @return Size of the tdigest
-     */
-    __device__ size_type operator()(size_type tdigest_index)
-    {
-      return offsets[tdigest_index + 1] - offsets[tdigest_index];
-    }
-  };
-  /**
-   * @brief Returns an iterator that returns the size of each tdigest
-   * in the column (each row is 1 digest)
-   *
-   * @return An iterator that returns the size of each tdigest in the column
-   */
-  [[nodiscard]] auto size_begin() const
-  {
-    return cudf::detail::make_counting_transform_iterator(
-      0, tdigest_size_fn{centroids().offsets_begin()});
-  }
-
   /**
    * @brief Returns the first min value for the column. Each row corresponds
    * to the minimum value for the accompanying digest.
diff --git a/cpp/include/cudf_test/tdigest_utilities.cuh b/cpp/include/cudf_test/tdigest_utilities.cuh
index 11a8d8715a0..ce45ad91be1 100644
--- a/cpp/include/cudf_test/tdigest_utilities.cuh
+++ b/cpp/include/cudf_test/tdigest_utilities.cuh
@@ -21,7 +21,7 @@
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/detail/unary.hpp>
 #include <cudf/groupby.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <thrust/device_vector.h>
@@ -267,7 +267,7 @@ void tdigest_simple_all_nulls_aggregation(Func op)
     static_cast<column_view>(values).type(), tdigest_gen{}, op, values, delta);
 
   // NOTE: an empty tdigest column still has 1 row.
-  auto expected = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto expected = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
 }
@@ -558,9 +558,9 @@ template <typename MergeFunc>
 void tdigest_merge_empty(MergeFunc merge_op)
 {
   // 3 empty tdigests all in the same group
-  auto a = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
-  auto b = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
-  auto c = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
+  auto b = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
+  auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
   std::vector<column_view> cols;
   cols.push_back(*a);
   cols.push_back(*b);
@@ -570,7 +570,7 @@ void tdigest_merge_empty(MergeFunc merge_op)
   auto const delta = 1000;
   auto result      = merge_op(*values, delta);
 
-  auto expected = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto expected = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result);
 }
diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp
index 55a0b89e446..e3d14f1deb7 100644
--- a/cpp/src/groupby/sort/aggregate.cpp
+++ b/cpp/src/groupby/sort/aggregate.cpp
@@ -701,7 +701,7 @@ void aggregate_result_functor::operator()<aggregation::TDIGEST>(aggregation cons
 
   cache.add_result(values,
                    agg,
-                   cudf::detail::tdigest::group_tdigest(
+                   cudf::tdigest::detail::group_tdigest(
                      get_sorted_values(),
                      helper.group_offsets(stream),
                      helper.group_labels(stream),
@@ -745,7 +745,7 @@ void aggregate_result_functor::operator()<aggregation::MERGE_TDIGEST>(aggregatio
     dynamic_cast<cudf::detail::merge_tdigest_aggregation const&>(agg).max_centroids;
   cache.add_result(values,
                    agg,
-                   cudf::detail::tdigest::group_merge_tdigest(get_grouped_values(),
+                   cudf::tdigest::detail::group_merge_tdigest(get_grouped_values(),
                                                               helper.group_offsets(stream),
                                                               helper.group_labels(stream),
                                                               helper.num_groups(stream),
diff --git a/cpp/src/quantiles/tdigest/tdigest.cu b/cpp/src/quantiles/tdigest/tdigest.cu
index 019809d5f68..0c90b0af8d2 100644
--- a/cpp/src/quantiles/tdigest/tdigest.cu
+++ b/cpp/src/quantiles/tdigest/tdigest.cu
@@ -14,13 +14,14 @@
  * limitations under the License.
  */
 
+#include <quantiles/tdigest/tdigest_util.cuh>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/valid_if.cuh>
 #include <cudf/lists/lists_column_view.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
@@ -42,8 +43,8 @@
 using namespace cudf::tdigest;
 
 namespace cudf {
-namespace detail {
 namespace tdigest {
+namespace detail {
 
 // https://developer.nvidia.com/blog/lerp-faster-cuda/
 template <typename T>
@@ -338,7 +339,7 @@ std::unique_ptr<scalar> make_empty_tdigest_scalar(rmm::cuda_stream_view stream,
     std::move(*std::make_unique<table>(std::move(contents.children))), true, stream, mr);
 }
 
-}  // namespace tdigest
+}  // namespace detail
 
 std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
                                           column_view const& percentiles,
@@ -354,8 +355,8 @@ std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
     data_type{type_id::INT32}, input.size() + 1, mask_state::UNALLOCATED, stream, mr);
   auto const all_empty_rows =
     thrust::count_if(rmm::exec_policy(stream),
-                     input.size_begin(),
-                     input.size_begin() + input.size(),
+                     detail::size_begin(input),
+                     detail::size_begin(input) + input.size(),
                      [] __device__(auto const x) { return x == 0; }) == input.size();
   auto row_size_iter = thrust::make_constant_iterator(all_empty_rows ? 0 : percentiles.size());
   thrust::exclusive_scan(rmm::exec_policy(stream),
@@ -379,7 +380,7 @@ std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
   // uninitialized)
   auto [bitmask, null_count] = [stream, mr, &tdv]() {
     auto tdigest_is_empty = thrust::make_transform_iterator(
-      tdv.size_begin(),
+      detail::size_begin(tdv),
       [] __device__(size_type tdigest_size) -> size_type { return tdigest_size == 0; });
     auto const null_count =
       thrust::reduce(rmm::exec_policy(stream), tdigest_is_empty, tdigest_is_empty + tdv.size(), 0);
@@ -390,24 +391,23 @@ std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
       tdigest_is_empty, tdigest_is_empty + tdv.size(), thrust::logical_not{}, stream, mr);
   }();
 
-  return cudf::make_lists_column(
-    input.size(),
-    std::move(offsets),
-    tdigest::compute_approx_percentiles(input, percentiles, stream, mr),
-    null_count,
-    std::move(bitmask),
-    stream,
-    mr);
+  return cudf::make_lists_column(input.size(),
+                                 std::move(offsets),
+                                 detail::compute_approx_percentiles(input, percentiles, stream, mr),
+                                 null_count,
+                                 std::move(bitmask),
+                                 stream,
+                                 mr);
 }
 
-}  // namespace detail
+}  // namespace tdigest
 
 std::unique_ptr<column> percentile_approx(tdigest_column_view const& input,
                                           column_view const& percentiles,
                                           rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::percentile_approx(input, percentiles, cudf::get_default_stream(), mr);
+  return tdigest::percentile_approx(input, percentiles, cudf::get_default_stream(), mr);
 }
 
 }  // namespace cudf
diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
index d870b73dff4..38c6cf7bd2e 100644
--- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
+++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <quantiles/tdigest/tdigest_util.cuh>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/copying.hpp>
@@ -26,7 +28,6 @@
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/lists/lists_column_view.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/device_uvector.hpp>
@@ -52,10 +53,8 @@
 #include <thrust/tuple.h>
 
 namespace cudf {
-namespace detail {
 namespace tdigest {
-
-using namespace cudf::tdigest;
+namespace detail {
 
 namespace {
 
@@ -596,7 +595,7 @@ std::unique_ptr<column> build_output_column(size_type num_rows,
 
   // if there are no stub tdigests, we can return immediately.
   if (num_stubs == 0) {
-    return cudf::detail::tdigest::make_tdigest_column(num_rows,
+    return cudf::tdigest::detail::make_tdigest_column(num_rows,
                                                       std::move(means),
                                                       std::move(weights),
                                                       std::move(offsets),
@@ -642,7 +641,7 @@ std::unique_ptr<column> build_output_column(size_type num_rows,
                          0);
 
   // assemble final column
-  return cudf::detail::tdigest::make_tdigest_column(num_rows,
+  return cudf::tdigest::detail::make_tdigest_column(num_rows,
                                                     std::move(_means),
                                                     std::move(_weights),
                                                     std::move(offsets),
@@ -708,7 +707,7 @@ std::unique_ptr<column> compute_tdigests(int delta,
   //   double       // max
   // }
   //
-  if (total_clusters == 0) { return cudf::detail::tdigest::make_empty_tdigest_column(stream, mr); }
+  if (total_clusters == 0) { return cudf::tdigest::detail::make_empty_tdigest_column(stream, mr); }
 
   // each input group represents an individual tdigest.  within each tdigest, we want the keys
   // to represent cluster indices (for example, if a tdigest had 100 clusters, the keys should fall
@@ -1067,9 +1066,10 @@ std::unique_ptr<column> merge_tdigests(tdigest_column_view const& tdv,
   // generate min and max values
   auto merged_min_col = cudf::make_numeric_column(
     data_type{type_id::FLOAT64}, num_groups, mask_state::UNALLOCATED, stream, mr);
-  auto min_iter = thrust::make_transform_iterator(
-    thrust::make_zip_iterator(thrust::make_tuple(tdv.min_begin(), tdv.size_begin())),
-    tdigest_min{});
+  auto min_iter =
+    thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(
+                                      tdv.min_begin(), cudf::tdigest::detail::size_begin(tdv))),
+                                    tdigest_min{});
   thrust::reduce_by_key(rmm::exec_policy(stream),
                         group_labels,
                         group_labels + num_group_labels,
@@ -1081,9 +1081,10 @@ std::unique_ptr<column> merge_tdigests(tdigest_column_view const& tdv,
 
   auto merged_max_col = cudf::make_numeric_column(
     data_type{type_id::FLOAT64}, num_groups, mask_state::UNALLOCATED, stream, mr);
-  auto max_iter = thrust::make_transform_iterator(
-    thrust::make_zip_iterator(thrust::make_tuple(tdv.max_begin(), tdv.size_begin())),
-    tdigest_max{});
+  auto max_iter =
+    thrust::make_transform_iterator(thrust::make_zip_iterator(thrust::make_tuple(
+                                      tdv.max_begin(), cudf::tdigest::detail::size_begin(tdv))),
+                                    tdigest_max{});
   thrust::reduce_by_key(rmm::exec_policy(stream),
                         group_labels,
                         group_labels + num_group_labels,
@@ -1190,7 +1191,7 @@ std::unique_ptr<scalar> reduce_tdigest(column_view const& col,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
-  if (col.size() == 0) { return cudf::detail::tdigest::make_empty_tdigest_scalar(stream, mr); }
+  if (col.size() == 0) { return cudf::tdigest::detail::make_empty_tdigest_scalar(stream, mr); }
 
   // since this isn't coming out of a groupby, we need to sort the inputs in ascending
   // order with nulls at the end.
@@ -1209,7 +1210,7 @@ std::unique_ptr<scalar> reduce_merge_tdigest(column_view const& input,
 {
   tdigest_column_view tdv(input);
 
-  if (input.size() == 0) { return cudf::detail::tdigest::make_empty_tdigest_scalar(stream, mr); }
+  if (input.size() == 0) { return cudf::tdigest::detail::make_empty_tdigest_scalar(stream, mr); }
 
   auto h_group_offsets = cudf::detail::make_counting_transform_iterator(
     0, [size = input.size()](size_type i) { return i == 0 ? 0 : size; });
@@ -1238,7 +1239,7 @@ std::unique_ptr<column> group_tdigest(column_view const& col,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
 {
-  if (col.size() == 0) { return cudf::detail::tdigest::make_empty_tdigest_column(stream, mr); }
+  if (col.size() == 0) { return cudf::tdigest::detail::make_empty_tdigest_column(stream, mr); }
 
   auto const delta = max_centroids;
   return cudf::type_dispatcher(col.type(),
@@ -1264,7 +1265,7 @@ std::unique_ptr<column> group_merge_tdigest(column_view const& input,
   tdigest_column_view tdv(input);
 
   if (num_groups == 0 || input.size() == 0) {
-    return cudf::detail::tdigest::make_empty_tdigest_column(stream, mr);
+    return cudf::tdigest::detail::make_empty_tdigest_column(stream, mr);
   }
 
   // bring group offsets back to the host
@@ -1286,6 +1287,6 @@ std::unique_ptr<column> group_merge_tdigest(column_view const& input,
                         mr);
 }
 
-}  // namespace tdigest
 }  // namespace detail
+}  // namespace tdigest
 }  // namespace cudf
diff --git a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
index df95c1d9da8..cfcd21c5690 100644
--- a/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
+++ b/cpp/src/quantiles/tdigest/tdigest_column_view.cpp
@@ -17,7 +17,7 @@
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/structs/structs_column_view.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 
 namespace cudf {
 namespace tdigest {
diff --git a/cpp/src/quantiles/tdigest/tdigest_util.cuh b/cpp/src/quantiles/tdigest/tdigest_util.cuh
new file mode 100644
index 00000000000..d0e6484875b
--- /dev/null
+++ b/cpp/src/quantiles/tdigest/tdigest_util.cuh
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/detail/iterator.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
+
+namespace cudf {
+namespace tdigest {
+namespace detail {
+
+/**
+ * @brief Functor to compute the size of each tdigest of a column
+ */
+struct tdigest_size_fn {
+  size_type const* offsets;  ///< Offsets of the t-digest column
+  /**
+   * @brief Returns size of the each tdigest in the column
+   *
+   * @param tdigest_index Index of the tdigest in the column
+   * @return Size of the tdigest
+   */
+  __device__ size_type operator()(size_type tdigest_index)
+  {
+    return offsets[tdigest_index + 1] - offsets[tdigest_index];
+  }
+};
+
+/**
+ * @brief Returns an iterator that returns the size of each tdigest
+ * in the column (each row is 1 digest)
+ *
+ * @return An iterator that returns the size of each tdigest in the column
+ */
+inline auto size_begin(tdigest_column_view const& tdv)
+{
+  return cudf::detail::make_counting_transform_iterator(
+    0, tdigest_size_fn{tdv.centroids().offsets_begin()});
+}
+
+}  // namespace detail
+}  // namespace tdigest
+}  // namespace cudf
diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp
index d7a195c088c..38db7eb3e89 100644
--- a/cpp/src/reductions/reductions.cpp
+++ b/cpp/src/reductions/reductions.cpp
@@ -124,13 +124,13 @@ struct reduce_dispatch_functor {
         CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
                      "Tdigest aggregations expect output type to be STRUCT");
         auto td_agg = static_cast<tdigest_aggregation const&>(agg);
-        return detail::tdigest::reduce_tdigest(col, td_agg.max_centroids, stream, mr);
+        return tdigest::detail::reduce_tdigest(col, td_agg.max_centroids, stream, mr);
       }
       case aggregation::MERGE_TDIGEST: {
         CUDF_EXPECTS(output_dtype.id() == type_id::STRUCT,
                      "Tdigest aggregations expect output type to be STRUCT");
         auto td_agg = static_cast<merge_tdigest_aggregation const&>(agg);
-        return detail::tdigest::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr);
+        return tdigest::detail::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr);
       }
       default: CUDF_FAIL("Unsupported reduction operator");
     }
@@ -157,7 +157,7 @@ std::unique_ptr<scalar> reduce(
   // handcraft the default scalar with input column.
   if (col.size() <= col.null_count()) {
     if (agg.kind == aggregation::TDIGEST || agg.kind == aggregation::MERGE_TDIGEST) {
-      return detail::tdigest::make_empty_tdigest_scalar(stream);
+      return tdigest::detail::make_empty_tdigest_scalar(stream);
     }
     if (col.type().id() == type_id::EMPTY || col.type() != output_dtype) {
       // Under some circumstance, the output type will become the List of input type,
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index 2e4a41a70f8..70b0851c814 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -17,7 +17,7 @@
 #include <cudf/detail/aggregation/aggregation.hpp>
 #include <cudf/detail/tdigest/tdigest.hpp>
 #include <cudf/lists/lists_column_view.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <cudf_test/base_fixture.hpp>
@@ -466,13 +466,13 @@ TEST_F(TDigestMergeTest, EmptyGroups)
   cudf::test::fixed_width_column_wrapper<int> keys{0, 0, 0, 0, 0, 0, 0};
   int const delta = 1000;
 
-  auto a = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto a = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
   auto b = cudf::type_dispatcher(
     static_cast<column_view>(values_b).type(), tdigest_gen_grouped{}, keys, values_b, delta);
-  auto c = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto c = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
   auto d = cudf::type_dispatcher(
     static_cast<column_view>(values_d).type(), tdigest_gen_grouped{}, keys, values_d, delta);
-  auto e = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto e = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
 
   std::vector<column_view> cols;
   cols.push_back(*a);
diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp
index c7db8894a23..2840d275d4d 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cpp
+++ b/cpp/tests/quantiles/percentile_approx_test.cpp
@@ -25,7 +25,7 @@
 #include <cudf/quantiles.hpp>
 #include <cudf/reduction.hpp>
 #include <cudf/sorting.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/transform.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
@@ -373,7 +373,7 @@ struct PercentileApproxTest : public cudf::test::BaseFixture {
 
 TEST_F(PercentileApproxTest, EmptyInput)
 {
-  auto empty_ = cudf::detail::tdigest::make_empty_tdigest_column(cudf::get_default_stream());
+  auto empty_ = cudf::tdigest::detail::make_empty_tdigest_column(cudf::get_default_stream());
   cudf::test::fixed_width_column_wrapper<double> percentiles{0.0, 0.25, 0.3};
 
   std::vector<cudf::column_view> input;
diff --git a/cpp/tests/utilities/tdigest_utilities.cu b/cpp/tests/utilities/tdigest_utilities.cu
index 68147dc29eb..beed9893d71 100644
--- a/cpp/tests/utilities/tdigest_utilities.cu
+++ b/cpp/tests/utilities/tdigest_utilities.cu
@@ -17,7 +17,7 @@
 #include <cudf/column/column_factories.hpp>
 #include <cudf/concatenate.hpp>
 #include <cudf/detail/tdigest/tdigest.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <cudf_test/column_utilities.hpp>
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index f52d3201a10..9d442772261 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -69,7 +69,7 @@
 #include <cudf/strings/strip.hpp>
 #include <cudf/strings/substring.hpp>
 #include <cudf/structs/structs_column_view.hpp>
-#include <cudf/tdigest/tdigest_column_view.cuh>
+#include <cudf/tdigest/tdigest_column_view.hpp>
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
 #include <cudf/unary.hpp>

From 74053f4c5214fbf5ae297b838d64e8055a23a02e Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 9 Nov 2022 13:52:23 -0500
Subject: [PATCH 143/202] Add regex_program class for use with all regex APIs
 (#11927)

Adds a new `regex_program` class to encapsulate a regex pattern and parameters used for executing regex calls on strings columns in libcudf. This provides a single object to hold the regex settings rather than adding or updating parameters to every call. Given a pattern (and other settings), it will _compile_ and validate the pattern and build the set of instructions/commands needed to execute the regex on a strings column. Converting the pattern is done in CPU code. The object contains no state data and can be reused on the same API or other similar calls as appropriate (per the settings).
The object can also be queried to help with resource allocation/expectations.

The main files to review are the new `regex_program*` source files plus the corresponding changes in `regexec.cpp` (renamed from .cu). The remainder are simply side-effects and have common patterns to use the new object.
No function or behavior has changed but rather an new interface has been added over existing function but additional tests have been added to exercise through the companion APIs.

Currently, all regex APIs are duplicated -- the original API plus a new one accepting a `regex_progam` object. Once accepted we may consider deprecating the non-object APIs and then removing them in a future release.

This will help with changes needed for #10852

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Bradley Dice (https://github.com/bdice)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/11927
---
 conda/recipes/libcudf/meta.yaml               |   1 +
 cpp/CMakeLists.txt                            |   3 +-
 cpp/include/cudf/strings/contains.hpp         |  81 ++++
 cpp/include/cudf/strings/extract.hpp          |  68 ++++
 cpp/include/cudf/strings/findall.hpp          |  36 ++
 cpp/include/cudf/strings/regex/flags.hpp      |   2 +-
 .../cudf/strings/regex/regex_program.hpp      | 138 +++++++
 cpp/include/cudf/strings/replace_re.hpp       |  50 +++
 cpp/include/cudf/strings/split/split_re.hpp   | 223 +++++++++++
 cpp/include/doxygen_groups.h                  |   1 +
 cpp/src/strings/contains.cu                   |  56 ++-
 cpp/src/strings/extract/extract.cu            |  19 +-
 cpp/src/strings/extract/extract_all.cu        |  20 +-
 cpp/src/strings/regex/regcomp.cpp             |   2 +-
 cpp/src/strings/regex/regcomp.h               |   2 +-
 cpp/src/strings/regex/regex.cuh               |  40 +-
 cpp/src/strings/regex/regex_program.cpp       |  63 +++
 cpp/src/strings/regex/regex_program_impl.h    |  50 +++
 .../strings/regex/{regexec.cu => regexec.cpp} |  25 +-
 cpp/src/strings/replace/backref_re.cu         |  22 +-
 cpp/src/strings/replace/multi_re.cu           |   4 +-
 cpp/src/strings/replace/replace_re.cu         |  22 +-
 cpp/src/strings/search/findall.cu             |  19 +-
 cpp/src/strings/split/split_re.cu             |  84 +++-
 cpp/tests/strings/contains_tests.cpp          | 375 +++++++++++++-----
 cpp/tests/strings/extract_tests.cpp           |  66 ++-
 cpp/tests/strings/findall_tests.cpp           |  20 +-
 cpp/tests/strings/replace_regex_tests.cpp     | 240 +++++++----
 cpp/tests/strings/split_tests.cpp             | 110 +++--
 29 files changed, 1508 insertions(+), 334 deletions(-)
 create mode 100644 cpp/include/cudf/strings/regex/regex_program.hpp
 create mode 100644 cpp/src/strings/regex/regex_program.cpp
 create mode 100644 cpp/src/strings/regex/regex_program_impl.h
 rename cpp/src/strings/regex/{regexec.cu => regexec.cpp} (90%)

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index e4697206d4d..15d2fcc2a36 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -236,6 +236,7 @@ outputs:
         - test -f $PREFIX/include/cudf/strings/json.hpp
         - test -f $PREFIX/include/cudf/strings/padding.hpp
         - test -f $PREFIX/include/cudf/strings/regex/flags.hpp
+        - test -f $PREFIX/include/cudf/strings/regex/regex_program.hpp
         - test -f $PREFIX/include/cudf/strings/repeat_strings.hpp
         - test -f $PREFIX/include/cudf/strings/replace.hpp
         - test -f $PREFIX/include/cudf/strings/replace_re.hpp
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index e13b1747a7e..a71eeb7cfbe 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -501,7 +501,8 @@ add_library(
   src/strings/padding.cu
   src/strings/json/json_path.cu
   src/strings/regex/regcomp.cpp
-  src/strings/regex/regexec.cu
+  src/strings/regex/regexec.cpp
+  src/strings/regex/regex_program.cpp
   src/strings/repeat_strings.cu
   src/strings/replace/backref_re.cu
   src/strings/replace/multi_re.cu
diff --git a/cpp/include/cudf/strings/contains.hpp b/cpp/include/cudf/strings/contains.hpp
index d95dc2c418c..1718d205871 100644
--- a/cpp/include/cudf/strings/contains.hpp
+++ b/cpp/include/cudf/strings/contains.hpp
@@ -24,6 +24,9 @@
 
 namespace cudf {
 namespace strings {
+
+struct regex_program;
+
 /**
  * @addtogroup strings_contains
  * @{
@@ -58,6 +61,32 @@ std::unique_ptr<column> contains_re(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a boolean column identifying rows which
+ * match the given regex_program object
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["abc", "123", "def456"]
+ * p = regex_program::create("\\d+")
+ * r = contains_re(s, p)
+ * r is now [false, true, true]
+ * @endcode
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column of boolean results for each string
+ */
+std::unique_ptr<column> contains_re(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns a boolean column identifying rows which
  * matching the given regex pattern but only at the beginning the string.
@@ -85,6 +114,32 @@ std::unique_ptr<column> matches_re(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a boolean column identifying rows which
+ * matching the given regex_program object but only at the beginning the string.
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["abc", "123", "def456"]
+ * p = regex_program::create("\\d+")
+ * r = matches_re(s, p)
+ * r is now [false, true, false]
+ * @endcode
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column of boolean results for each string
+ */
+std::unique_ptr<column> matches_re(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns the number of times the given regex pattern
  * matches in each string.
@@ -112,6 +167,32 @@ std::unique_ptr<column> count_re(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns the number of times the given regex_program's pattern
+ * matches in each string
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["abc", "123", "def45"]
+ * p = regex_program::create("\\d")
+ * r = count_re(s, p)
+ * r is now [0, 3, 2]
+ * @endcode
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New INT32 column with counts for each string
+ */
+std::unique_ptr<column> count_re(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns a boolean column identifying rows which
  * match the given like pattern.
diff --git a/cpp/include/cudf/strings/extract.hpp b/cpp/include/cudf/strings/extract.hpp
index a30098bedb9..a80d971438d 100644
--- a/cpp/include/cudf/strings/extract.hpp
+++ b/cpp/include/cudf/strings/extract.hpp
@@ -23,6 +23,9 @@
 
 namespace cudf {
 namespace strings {
+
+struct regex_program;
+
 /**
  * @addtogroup strings_substring
  * @{
@@ -61,6 +64,37 @@ std::unique_ptr<table> extract(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a table of strings columns where each column corresponds to the matching
+ * group specified in the given regex_program object
+ *
+ * All the strings for the first group will go in the first output column; the second group
+ * go in the second column and so on. Null entries are added to the columns in row `i` if
+ * the string at row `i` does not match.
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["a1", "b2", "c3"]
+ * p = regex_program::create("([ab])(\\d)")
+ * r = extract(s, p)
+ * r is now [ ["a", "b", null],
+ *            ["1", "2", null] ]
+ * @endcode
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned table's device memory
+ * @return Columns of strings extracted from the input column
+ */
+std::unique_ptr<table> extract(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Returns a lists column of strings where each string column row corresponds to the
  * matching group specified in the given regular expression pattern.
@@ -96,6 +130,40 @@ std::unique_ptr<column> extract_all_record(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a lists column of strings where each string column row corresponds to the
+ * matching group specified in the given regex_program object
+ *
+ * All the matching groups for the first row will go in the first row output column; the second
+ * row results will go into the second row output column and so on.
+ *
+ * A null output row will result if the corresponding input string row does not match or
+ * that input row is null.
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["a1 b4", "b2", "c3 a5", "b", null]
+ * p = regex_program::create("([ab])(\\d)")
+ * r = extract_all_record(s, p)
+ * r is now [ ["a", "1", "b", "4"],
+ *            ["b", "2"],
+ *            ["a", "5"],
+ *            null,
+ *            null ]
+ * @endcode
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate any returned device memory
+ * @return Lists column containing strings extracted from the input column
+ */
+std::unique_ptr<column> extract_all_record(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp
index 6969ba35b1b..366e1eb0482 100644
--- a/cpp/include/cudf/strings/findall.hpp
+++ b/cpp/include/cudf/strings/findall.hpp
@@ -23,6 +23,9 @@
 
 namespace cudf {
 namespace strings {
+
+struct regex_program;
+
 /**
  * @addtogroup strings_contains
  * @{
@@ -63,6 +66,39 @@ std::unique_ptr<column> findall(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Returns a lists column of strings for each matching occurrence using
+ * the regex_program pattern within each string
+ *
+ * Each output row includes all the substrings within the corresponding input row
+ * that match the given pattern. If no matches are found, the output row is empty.
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["bunny", "rabbit", "hare", "dog"]
+ * p = regex_program::create("[ab]")
+ * r = findall(s, p)
+ * r is now a lists column like:
+ *  [ ["b"]
+ *    ["a","b","b"]
+ *    ["a"]
+ *    [] ]
+ * @endcode
+ *
+ * A null output row occurs if the corresponding input row is null.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param input Strings instance for this operation
+ * @param prog Regex program instance
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New lists column of strings
+ */
+std::unique_ptr<column> findall(
+  strings_column_view const& input,
+  regex_program const& prog,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/cudf/strings/regex/flags.hpp b/cpp/include/cudf/strings/regex/flags.hpp
index 3a7051345fa..44ca68439e7 100644
--- a/cpp/include/cudf/strings/regex/flags.hpp
+++ b/cpp/include/cudf/strings/regex/flags.hpp
@@ -21,7 +21,7 @@ namespace cudf {
 namespace strings {
 
 /**
- * @addtogroup strings_contains
+ * @addtogroup strings_regex
  * @{
  */
 
diff --git a/cpp/include/cudf/strings/regex/regex_program.hpp b/cpp/include/cudf/strings/regex/regex_program.hpp
new file mode 100644
index 00000000000..2b606393719
--- /dev/null
+++ b/cpp/include/cudf/strings/regex/regex_program.hpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/strings/regex/flags.hpp>
+#include <cudf/types.hpp>
+
+#include <memory>
+#include <string>
+
+namespace cudf {
+namespace strings {
+
+/**
+ * @addtogroup strings_regex
+ * @{
+ */
+
+/**
+ * @brief Regex program class
+ *
+ * Create an instance from a regex pattern and use it to call the appropriate
+ * strings APIs. An instance can be reused.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns and APIs that support regex.
+ */
+struct regex_program {
+  struct regex_program_impl;
+
+  /**
+   * @brief Create a program from a pattern
+   *
+   * @throw cudf::logic_error If pattern is invalid or contains unsupported features
+   *
+   * @param pattern Regex pattern
+   * @param flags Regex flags for interpreting special characters in the pattern
+   * @param capture Controls how capture groups in the pattern are used
+   * @return Instance of this object
+   */
+  static std::unique_ptr<regex_program> create(std::string_view pattern,
+                                               regex_flags flags      = regex_flags::DEFAULT,
+                                               capture_groups capture = capture_groups::EXTRACT);
+
+  /**
+   * @brief Move constructor
+   *
+   * @param other Object to move from
+   */
+  regex_program(regex_program&& other);
+
+  /**
+   * @brief Move operator assignment
+   *
+   * @param other Object to move from
+   * @return this object
+   */
+  regex_program& operator=(regex_program&& other);
+
+  /**
+   * @brief Return the pattern used to create this instance
+   *
+   * @return regex pattern as a string
+   */
+  std::string pattern() const;
+
+  /**
+   * @brief Return the regex_flags used to create this instance
+   *
+   * @return regex flags setting
+   */
+  regex_flags flags() const;
+
+  /**
+   * @brief Return the capture_groups used to create this instance
+   *
+   * @return capture groups setting
+   */
+  capture_groups capture() const;
+
+  /**
+   * @brief Return the number of instructions in this instance
+   *
+   * @return Number of instructions
+   */
+  int32_t instructions_count() const;
+
+  /**
+   * @brief Return the number of capture groups in this instance
+   *
+   * @return Number of groups
+   */
+  int32_t groups_count() const;
+
+  /**
+   * @brief Return the pattern used to create this instance
+   *
+   * @param num_strings Number of strings for computation
+   * @return Size of the working memory in bytes
+   */
+  std::size_t compute_working_memory_size(int32_t num_strings) const;
+
+  ~regex_program();
+
+ private:
+  regex_program() = delete;
+
+  std::string _pattern;
+  regex_flags _flags;
+  capture_groups _capture;
+
+  std::unique_ptr<regex_program_impl> _impl;
+
+  /**
+   * @brief Constructor
+   *
+   * Called by create()
+   */
+  regex_program(std::string_view pattern, regex_flags flags, capture_groups capture);
+
+  friend struct regex_device_builder;
+};
+
+/** @} */  // end of doxygen group
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/include/cudf/strings/replace_re.hpp b/cpp/include/cudf/strings/replace_re.hpp
index d80b9a89b81..60c66956fb8 100644
--- a/cpp/include/cudf/strings/replace_re.hpp
+++ b/cpp/include/cudf/strings/replace_re.hpp
@@ -26,6 +26,9 @@
 
 namespace cudf {
 namespace strings {
+
+struct regex_program;
+
 /**
  * @addtogroup strings_replace
  * @{
@@ -58,6 +61,30 @@ std::unique_ptr<column> replace_re(
   regex_flags const flags                    = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr        = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief For each string, replaces any character sequence matching the given regex
+ * with the provided replacement string.
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param replacement The string used to replace the matched sequence in each string.
+ *        Default is an empty string.
+ * @param max_replace_count The maximum number of times to replace the matched pattern
+ *        within each string. Default replaces every substring that is matched.
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column
+ */
+std::unique_ptr<column> replace_re(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  string_scalar const& replacement           = string_scalar(""),
+  std::optional<size_type> max_replace_count = std::nullopt,
+  rmm::mr::device_memory_resource* mr        = rmm::mr::get_current_device_resource());
+
 /**
  * @brief For each string, replaces any character sequence matching the given patterns
  * with the corresponding string in the `replacements` column.
@@ -105,5 +132,28 @@ std::unique_ptr<column> replace_with_backrefs(
   regex_flags const flags             = regex_flags::DEFAULT,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief For each string, replaces any character sequence matching the given regex
+ * using the replacement template for back-references.
+ *
+ * Any null string entries return corresponding null output column entries.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @throw cudf::logic_error if capture index values in `replacement` are not in range 0-99, and also
+ * if the index exceeds the group count specified in the pattern
+ *
+ * @param strings Strings instance for this operation
+ * @param prog Regex program instance
+ * @param replacement The replacement template for creating the output string
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New strings column
+ */
+std::unique_ptr<column> replace_with_backrefs(
+  strings_column_view const& strings,
+  regex_program const& prog,
+  std::string_view replacement,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/cudf/strings/split/split_re.hpp b/cpp/include/cudf/strings/split/split_re.hpp
index 6fe07b0f5dc..c6bd1345ae6 100644
--- a/cpp/include/cudf/strings/split/split_re.hpp
+++ b/cpp/include/cudf/strings/split/split_re.hpp
@@ -23,6 +23,9 @@
 
 namespace cudf {
 namespace strings {
+
+struct regex_program;
+
 /**
  * @addtogroup strings_split
  * @{
@@ -77,6 +80,58 @@ std::unique_ptr<table> split_re(
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Splits strings elements into a table of strings columns
+ * using a regex_program's pattern to delimit each string
+ *
+ * Each element generates a vector of strings that are stored in corresponding
+ * rows in the output table -- `table[col,row] = token[col] of strings[row]`
+ * where `token` is a substring between delimiters.
+ *
+ * The number of rows in the output table will be the same as the number of
+ * elements in the input column. The resulting number of columns will be the
+ * maximum number of tokens found in any input row.
+ *
+ * The `pattern` is used to identify the delimiters within a string
+ * and splitting stops when either `maxsplit` or the end of the string is reached.
+ *
+ * An empty input string will produce a corresponding empty string in the
+ * corresponding row of the first column.
+ * A null row will produce corresponding null rows in the output table.
+ *
+ * The regex_program's regex_flags are ignored.
+ *
+ * @code{.pseudo}
+ * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
+ * p1 = regex_program::create("[_ ]")
+ * s1 = split_re(s, p1)
+ * s1 is a table of strings columns:
+ *     [ ["a", "a", "", "ab"],
+ *       ["bc", "", "ab", "cd"],
+ *       ["def", "bc", "cd", ""],
+ *       ["g", null, null, null] ]
+ * p2 = regex_program::create("[ _]")
+ * s2 = split_re(s, p2, 1)
+ * s2 is a table of strings columns:
+ *     [ ["a", "a", "", "ab"],
+ *       ["bc def_g", "_bc", "ab cd", "cd "] ]
+ * @endcode
+ *
+ * @throw cudf::logic_error if `pattern` is empty.
+ *
+ * @param input A column of string elements to be split
+ * @param prog Regex program instance
+ * @param maxsplit Maximum number of splits to perform.
+ *        Default of -1 indicates all possible splits on each string.
+ * @param mr Device memory resource used to allocate the returned result's device memory
+ * @return A table of columns of strings
+ */
+std::unique_ptr<table> split_re(
+  strings_column_view const& input,
+  regex_program const& prog,
+  size_type maxsplit                  = -1,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Splits strings elements into a table of strings columns
  * using a regex pattern to delimit each string starting from the end of the string.
@@ -127,6 +182,60 @@ std::unique_ptr<table> rsplit_re(
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Splits strings elements into a table of strings columns using a
+ * regex_program's pattern to delimit each string starting from the end of the string
+ *
+ * Each element generates a vector of strings that are stored in corresponding
+ * rows in the output table -- `table[col,row] = token[col] of string[row]`
+ * where `token` is the substring between each delimiter.
+ *
+ * The number of rows in the output table will be the same as the number of
+ * elements in the input column. The resulting number of columns will be the
+ * maximum number of tokens found in any input row.
+ *
+ * Splitting occurs by traversing starting from the end of the input string.
+ * The `pattern` is used to identify the delimiters within a string
+ * and splitting stops when either `maxsplit` or the beginning of the string
+ * is reached.
+ *
+ * An empty input string will produce a corresponding empty string in the
+ * corresponding row of the first column.
+ * A null row will produce corresponding null rows in the output table.
+ *
+ * The regex_program's regex_flags are ignored.
+ *
+ * @code{.pseudo}
+ * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
+ * p1 = regex_program::create("[_ ]")
+ * s1 = rsplit_re(s, p1)
+ * s1 is a table of strings columns:
+ *     [ ["a", "a", "", "ab"],
+ *       ["bc", "", "ab", "cd"],
+ *       ["def", "bc", "cd", ""],
+ *       ["g", null, null, null] ]
+ * p2 = regex_program::create("[ _]")
+ * s2 = rsplit_re(s, p2, 1)
+ * s2 is a table of strings columns:
+ *     [ ["a_bc def", "a_", "_ab", "ab"],
+ *       ["g", "bc", "cd", "cd "] ]
+ * @endcode
+ *
+ * @throw cudf::logic_error if `pattern` is empty.
+ *
+ * @param input A column of string elements to be split.
+ * @param prog Regex program instance
+ * @param maxsplit Maximum number of splits to perform.
+ *        Default of -1 indicates all possible splits on each string.
+ * @param mr Device memory resource used to allocate the returned result's device memory.
+ * @return A table of columns of strings.
+ */
+std::unique_ptr<table> rsplit_re(
+  strings_column_view const& input,
+  regex_program const& prog,
+  size_type maxsplit                  = -1,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Splits strings elements into a list column of strings
  * using the given regex pattern to delimit each string.
@@ -179,6 +288,62 @@ std::unique_ptr<column> split_record_re(
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Splits strings elements into a list column of strings
+ * using the given regex_program to delimit each string
+ *
+ * Each element generates an array of strings that are stored in an output
+ * lists column -- `list[row] = [token1, token2, ...] found in input[row]`
+ * where `token` is a substring between delimiters.
+ *
+ * The number of elements in the output column will be the same as the number of
+ * elements in the input column. Each individual list item will contain the
+ * new strings for that row. The resulting number of strings in each row can vary
+ * from 0 to `maxsplit + 1`.
+ *
+ * The `pattern` is used to identify the delimiters within a string
+ * and splitting stops when either `maxsplit` or the end of the string is reached.
+ *
+ * An empty input string will produce a corresponding empty list item output row.
+ * A null row will produce a corresponding null output row.
+ *
+ * The regex_program's regex_flags are ignored.
+ *
+ * @code{.pseudo}
+ * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
+ * p1 = regex_program::create("[_ ]")
+ * s1 = split_record_re(s, p1)
+ * s1 is a lists column of strings:
+ *     [ ["a", "bc", "def", "g"],
+ *       ["a", "", "bc"],
+ *       ["", "ab", "cd"],
+ *       ["ab", "cd", ""] ]
+ * p2 = regex_program::create("[ _]")
+ * s2 = split_record_re(s, p2, 1)
+ * s2 is a lists column of strings:
+ *     [ ["a", "bc def_g"],
+ *       ["a", "_bc"],
+ *       ["", "ab cd"],
+ *       ["ab", "cd "] ]
+ * @endcode
+ *
+ * @throw cudf::logic_error if `pattern` is empty.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param input A column of string elements to be split
+ * @param prog Regex program instance
+ * @param maxsplit Maximum number of splits to perform.
+ *        Default of -1 indicates all possible splits on each string.
+ * @param mr Device memory resource used to allocate the returned result's device memory
+ * @return Lists column of strings.
+ */
+std::unique_ptr<column> split_record_re(
+  strings_column_view const& input,
+  regex_program const& prog,
+  size_type maxsplit                  = -1,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /**
  * @brief Splits strings elements into a list column of strings
  * using the given regex pattern to delimit each string starting from the end of the string.
@@ -233,6 +398,64 @@ std::unique_ptr<column> rsplit_record_re(
   size_type maxsplit                  = -1,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief Splits strings elements into a list column of strings using the given
+ * regex_program to delimit each string starting from the end of the string
+ *
+ * Each element generates a vector of strings that are stored in an output
+ * lists column -- `list[row] = [token1, token2, ...] found in input[row]`
+ * where `token` is a substring between delimiters.
+ *
+ * The number of elements in the output column will be the same as the number of
+ * elements in the input column. Each individual list item will contain the
+ * new strings for that row. The resulting number of strings in each row can vary
+ * from 0 to `maxsplit + 1`.
+ *
+ * Splitting occurs by traversing starting from the end of the input string.
+ * The `pattern` is used to identify the separation points within a string
+ * and splitting stops when either `maxsplit` or the beginning of the string
+ * is reached.
+ *
+ * An empty input string will produce a corresponding empty list item output row.
+ * A null row will produce a corresponding null output row.
+ *
+ * The regex_program's regex_flags are ignored.
+ *
+ * @code{.pseudo}
+ * s = ["a_bc def_g", "a__bc", "_ab cd", "ab_cd "]
+ * p1 = regex_program::create("[_ ]")
+ * s1 = rsplit_record_re(s, p1)
+ * s1 is a lists column of strings:
+ *     [ ["a", "bc", "def", "g"],
+ *       ["a", "", "bc"],
+ *       ["", "ab", "cd"],
+ *       ["ab", "cd", ""] ]
+ * p2 = regex_program::create("[ _]")
+ * s2 = rsplit_record_re(s, p2, 1)
+ * s2 is a lists column of strings:
+ *     [ ["a_bc def", "g"],
+ *       ["a_", "bc"],
+ *       ["_ab", "cd"],
+ *       ["ab_cd", ""] ]
+ * @endcode
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @throw cudf::logic_error if `pattern` is empty.
+ *
+ * @param input A column of string elements to be split
+ * @param prog Regex program instance
+ * @param maxsplit Maximum number of splits to perform.
+ *        Default of -1 indicates all possible splits on each string.
+ * @param mr Device memory resource used to allocate the returned result's device memory
+ * @return Lists column of strings
+ */
+std::unique_ptr<column> rsplit_record_re(
+  strings_column_view const& input,
+  regex_program const& prog,
+  size_type maxsplit                  = -1,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/include/doxygen_groups.h b/cpp/include/doxygen_groups.h
index c0ea06959b2..5c335b720d5 100644
--- a/cpp/include/doxygen_groups.h
+++ b/cpp/include/doxygen_groups.h
@@ -129,6 +129,7 @@
  *   @defgroup strings_replace Replacing
  *   @defgroup strings_split Splitting
  *   @defgroup strings_json JSON
+ *   @defgroup strings_regex Regex
  * @}
  * @defgroup dictionary_apis Dictionary
  * @{
diff --git a/cpp/src/strings/contains.cu b/cpp/src/strings/contains.cu
index 80941990610..eafc78be8da 100644
--- a/cpp/src/strings/contains.cu
+++ b/cpp/src/strings/contains.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column.hpp>
@@ -57,8 +58,7 @@ struct contains_fn {
 };
 
 std::unique_ptr<column> contains_impl(strings_column_view const& input,
-                                      std::string_view pattern,
-                                      regex_flags const flags,
+                                      regex_program const& prog,
                                       bool const beginning_only,
                                       rmm::cuda_stream_view stream,
                                       rmm::mr::device_memory_resource* mr)
@@ -71,7 +71,7 @@ std::unique_ptr<column> contains_impl(strings_column_view const& input,
                                      mr);
   if (input.is_empty()) { return results; }
 
-  auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   auto d_results       = results->mutable_view().data<bool>();
   auto const d_strings = column_device_view::create(input.parent(), stream);
@@ -87,31 +87,28 @@ std::unique_ptr<column> contains_impl(strings_column_view const& input,
 }  // namespace
 
 std::unique_ptr<column> contains_re(strings_column_view const& input,
-                                    std::string_view pattern,
-                                    regex_flags const flags,
+                                    regex_program const& prog,
                                     rmm::cuda_stream_view stream,
                                     rmm::mr::device_memory_resource* mr)
 {
-  return contains_impl(input, pattern, flags, false, stream, mr);
+  return contains_impl(input, prog, false, stream, mr);
 }
 
 std::unique_ptr<column> matches_re(strings_column_view const& input,
-                                   std::string_view pattern,
-                                   regex_flags const flags,
+                                   regex_program const& prog,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr)
 {
-  return contains_impl(input, pattern, flags, true, stream, mr);
+  return contains_impl(input, prog, true, stream, mr);
 }
 
 std::unique_ptr<column> count_re(strings_column_view const& input,
-                                 std::string_view pattern,
-                                 regex_flags const flags,
+                                 regex_program const& prog,
                                  rmm::cuda_stream_view stream,
                                  rmm::mr::device_memory_resource* mr)
 {
-  // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   auto const d_strings = column_device_view::create(input.parent(), stream);
 
@@ -133,7 +130,16 @@ std::unique_ptr<column> contains_re(strings_column_view const& strings,
                                     rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::contains_re(strings, pattern, flags, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::NON_CAPTURE);
+  return detail::contains_re(strings, *h_prog, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> contains_re(strings_column_view const& strings,
+                                    regex_program const& prog,
+                                    rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::contains_re(strings, prog, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> matches_re(strings_column_view const& strings,
@@ -142,7 +148,16 @@ std::unique_ptr<column> matches_re(strings_column_view const& strings,
                                    rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::matches_re(strings, pattern, flags, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::NON_CAPTURE);
+  return detail::matches_re(strings, *h_prog, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> matches_re(strings_column_view const& strings,
+                                   regex_program const& prog,
+                                   rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::matches_re(strings, prog, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> count_re(strings_column_view const& strings,
@@ -151,7 +166,16 @@ std::unique_ptr<column> count_re(strings_column_view const& strings,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::count_re(strings, pattern, flags, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::NON_CAPTURE);
+  return detail::count_re(strings, *h_prog, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> count_re(strings_column_view const& strings,
+                                 regex_program const& prog,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::count_re(strings, prog, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/extract/extract.cu b/cpp/src/strings/extract/extract.cu
index 882b85d1066..f99b0e63715 100644
--- a/cpp/src/strings/extract/extract.cu
+++ b/cpp/src/strings/extract/extract.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column.hpp>
@@ -86,13 +87,12 @@ struct extract_fn {
 
 //
 std::unique_ptr<table> extract(strings_column_view const& input,
-                               std::string_view pattern,
-                               regex_flags const flags,
+                               regex_program const& prog,
                                rmm::cuda_stream_view stream,
                                rmm::mr::device_memory_resource* mr)
 {
-  // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, capture_groups::EXTRACT, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   auto const groups = d_prog->group_counts();
   CUDF_EXPECTS(groups > 0, "Group indicators not found in regex pattern");
@@ -136,7 +136,16 @@ std::unique_ptr<table> extract(strings_column_view const& strings,
                                rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract(strings, pattern, flags, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::EXTRACT);
+  return detail::extract(strings, *h_prog, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<table> extract(strings_column_view const& strings,
+                               regex_program const& prog,
+                               rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract(strings, prog, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/extract/extract_all.cu b/cpp/src/strings/extract/extract_all.cu
index e669d2178a2..c27834dae19 100644
--- a/cpp/src/strings/extract/extract_all.cu
+++ b/cpp/src/strings/extract/extract_all.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column_device_view.cuh>
@@ -96,16 +97,16 @@ struct extract_fn {
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
 std::unique_ptr<column> extract_all_record(strings_column_view const& input,
-                                           std::string_view pattern,
-                                           regex_flags const flags,
+                                           regex_program const& prog,
                                            rmm::cuda_stream_view stream,
                                            rmm::mr::device_memory_resource* mr)
 {
   auto const strings_count = input.size();
   auto const d_strings     = column_device_view::create(input.parent(), stream);
 
-  // Compile regex into device object.
-  auto d_prog = reprog_device::create(pattern, flags, capture_groups::EXTRACT, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
+
   // The extract pattern should always include groups.
   auto const groups = d_prog->group_counts();
   CUDF_EXPECTS(groups > 0, "extract_all requires group indicators in the regex pattern.");
@@ -170,7 +171,16 @@ std::unique_ptr<column> extract_all_record(strings_column_view const& strings,
                                            rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::extract_all_record(strings, pattern, flags, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::EXTRACT);
+  return detail::extract_all_record(strings, *h_prog, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> extract_all_record(strings_column_view const& strings,
+                                           regex_program const& prog,
+                                           rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_all_record(strings, prog, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp
index 5b86aedc409..0c0404f31ce 100644
--- a/cpp/src/strings/regex/regcomp.cpp
+++ b/cpp/src/strings/regex/regcomp.cpp
@@ -123,7 +123,7 @@ int32_t reprog::add_class(reclass const& cls)
 
 reinst& reprog::inst_at(int32_t id) { return _insts[id]; }
 
-reclass& reprog::class_at(int32_t id) { return _classes[id]; }
+reclass const& reprog::class_at(int32_t id) const { return _classes[id]; }
 
 void reprog::set_start_inst(int32_t id) { _startinst_id = id; }
 
diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h
index 7ad7f481436..b450b3f90e7 100644
--- a/cpp/src/strings/regex/regcomp.h
+++ b/cpp/src/strings/regex/regcomp.h
@@ -128,7 +128,7 @@ class reprog {
   [[nodiscard]] reinst const* insts_data() const;
 
   [[nodiscard]] int32_t classes_count() const;
-  [[nodiscard]] reclass& class_at(int32_t id);
+  [[nodiscard]] reclass const& class_at(int32_t id) const;
   [[nodiscard]] reclass const* classes_data() const;
 
   [[nodiscard]] const int32_t* starts_data() const;
diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh
index 98631680800..d16efb5f66e 100644
--- a/cpp/src/strings/regex/regex.cuh
+++ b/cpp/src/strings/regex/regex.cuh
@@ -25,6 +25,8 @@
 #include <thrust/optional.h>
 #include <thrust/pair.h>
 
+#include <cuda_runtime.h>
+
 #include <memory>
 
 namespace cudf {
@@ -56,6 +58,8 @@ struct alignas(16) reclass_device {
   __device__ inline bool is_match(char32_t const ch, uint8_t const* flags) const;
 };
 
+class reprog;
+
 /**
  * @brief Regex program of instructions/data for a specific regex pattern.
  *
@@ -78,32 +82,14 @@ class reprog_device {
   reprog_device& operator=(reprog_device&&) = default;
 
   /**
-   * @brief Create device program instance from a regex pattern.
-   *
-   * The number of strings is needed to compute the state data size required when evaluating the
-   * regex.
-   *
-   * @param pattern The regex pattern to compile.
-   * @param stream CUDA stream used for device memory operations and kernel launches.
-   * @return The program device object.
-   */
-  static std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> create(
-    std::string_view pattern, rmm::cuda_stream_view stream);
-
-  /**
-   * @brief Create the device program instance from a regex pattern
+   * @brief Create device program instance from a regex program
    *
-   * @param pattern The regex pattern to compile
-   * @param re_flags Regex flags for interpreting special characters in the pattern
-   * @param capture Control how capture groups are processed
+   * @param prog The regex program to create from
    * @param stream CUDA stream used for device memory operations and kernel launches
    * @return The program device object
    */
   static std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> create(
-    std::string_view pattern,
-    regex_flags const re_flags,
-    capture_groups const capture,
-    rmm::cuda_stream_view stream);
+    reprog const& prog, rmm::cuda_stream_view stream);
 
   /**
    * @brief Called automatically by the unique_ptr returned from create().
@@ -270,7 +256,7 @@ class reprog_device {
                                          cudf::size_type& end,
                                          cudf::size_type const group_id = 0) const;
 
-  reprog_device(reprog&);
+  reprog_device(reprog const&);
 
   int32_t _startinst_id;          // first instruction id
   int32_t _num_capturing_groups;  // instruction groups
@@ -289,6 +275,16 @@ class reprog_device {
   int32_t _thread_count{};   // threads available in working memory
 };
 
+/**
+ * @brief Return the size in bytes needed for working memory to
+ * execute insts_count instructions in parallel over num_threads threads.
+ *
+ * @param num_threads Number of parallel threads (usually one per string in a strings column)
+ * @param insts_count Number of instructions from a compiled regex pattern
+ * @return Number of bytes needed for working memory
+ */
+std::size_t compute_working_memory_size(int32_t num_threads, int32_t insts_count);
+
 }  // namespace detail
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/regex/regex_program.cpp b/cpp/src/strings/regex/regex_program.cpp
new file mode 100644
index 00000000000..c64da213fcf
--- /dev/null
+++ b/cpp/src/strings/regex/regex_program.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "regex_program_impl.h"
+
+#include <cudf/strings/regex/regex_program.hpp>
+
+#include <memory>
+#include <string>
+
+namespace cudf {
+namespace strings {
+
+std::unique_ptr<regex_program> regex_program::create(std::string_view pattern,
+                                                     regex_flags flags,
+                                                     capture_groups capture)
+{
+  auto p = new regex_program(pattern, flags, capture);
+  return std::unique_ptr<regex_program>(p);
+}
+
+regex_program::~regex_program()                     = default;
+regex_program::regex_program(regex_program&& other) = default;
+regex_program& regex_program::operator=(regex_program&& other) = default;
+
+regex_program::regex_program(std::string_view pattern, regex_flags flags, capture_groups capture)
+  : _pattern(pattern),
+    _flags(flags),
+    _impl(
+      std::make_unique<regex_program_impl>(detail::reprog::create_from(pattern, flags, capture)))
+{
+}
+
+std::string regex_program::pattern() const { return _pattern; }
+
+regex_flags regex_program::flags() const { return _flags; }
+
+capture_groups regex_program::capture() const { return _capture; }
+
+int32_t regex_program::instructions_count() const { return _impl->prog.insts_count(); }
+
+int32_t regex_program::groups_count() const { return _impl->prog.groups_count(); }
+
+std::size_t regex_program::compute_working_memory_size(int32_t num_strings) const
+{
+  return detail::compute_working_memory_size(num_strings, instructions_count());
+}
+
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/src/strings/regex/regex_program_impl.h b/cpp/src/strings/regex/regex_program_impl.h
new file mode 100644
index 00000000000..eede2225bce
--- /dev/null
+++ b/cpp/src/strings/regex/regex_program_impl.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "regcomp.h"
+#include "regex.cuh"
+
+#include <cudf/strings/regex/regex_program.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+
+namespace cudf {
+namespace strings {
+
+/**
+ * @brief Implementation object for regex_program
+ *
+ * It encapsulates internal reprog object used for building its device equivalent
+ */
+struct regex_program::regex_program_impl {
+  detail::reprog prog;
+
+  regex_program_impl(detail::reprog const& p) : prog(p) {}
+  regex_program_impl(detail::reprog&& p) : prog(p) {}
+
+  // TODO: There will be other options added here in the future to handle issues
+  // 10852 and possibly others like 11979
+};
+
+struct regex_device_builder {
+  static auto create_prog_device(regex_program const& p, rmm::cuda_stream_view stream)
+  {
+    return detail::reprog_device::create(p._impl->prog, stream);
+  }
+};
+
+}  // namespace strings
+}  // namespace cudf
diff --git a/cpp/src/strings/regex/regexec.cu b/cpp/src/strings/regex/regexec.cpp
similarity index 90%
rename from cpp/src/strings/regex/regexec.cu
rename to cpp/src/strings/regex/regexec.cpp
index 03247d24ba3..1c0a6869a2c 100644
--- a/cpp/src/strings/regex/regexec.cu
+++ b/cpp/src/strings/regex/regexec.cpp
@@ -33,7 +33,7 @@ namespace strings {
 namespace detail {
 
 // Copy reprog primitive values
-reprog_device::reprog_device(reprog& prog)
+reprog_device::reprog_device(reprog const& prog)
   : _startinst_id{prog.get_start_inst()},
     _num_capturing_groups{prog.groups_count()},
     _insts_count{prog.insts_count()},
@@ -45,22 +45,8 @@ reprog_device::reprog_device(reprog& prog)
 }
 
 std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_device::create(
-  std::string_view pattern, rmm::cuda_stream_view stream)
+  reprog const& h_prog, rmm::cuda_stream_view stream)
 {
-  return reprog_device::create(
-    pattern, regex_flags::MULTILINE, capture_groups::NON_CAPTURE, stream);
-}
-
-// Create instance of the reprog that can be passed into a device kernel
-std::unique_ptr<reprog_device, std::function<void(reprog_device*)>> reprog_device::create(
-  std::string_view pattern,
-  regex_flags const flags,
-  capture_groups const capture,
-  rmm::cuda_stream_view stream)
-{
-  // compile pattern into host object
-  reprog h_prog = reprog::create_from(pattern, flags, capture);
-
   // compute size to hold all the member data
   auto const insts_count   = h_prog.insts_count();
   auto const classes_count = h_prog.classes_count();
@@ -144,7 +130,7 @@ void reprog_device::destroy() { delete this; }
 
 std::size_t reprog_device::working_memory_size(int32_t num_threads) const
 {
-  return relist::alloc_size(_insts_count, num_threads) * 2;
+  return compute_working_memory_size(insts_counts(), num_threads);
 }
 
 std::pair<std::size_t, int32_t> reprog_device::compute_strided_working_memory(
@@ -176,6 +162,11 @@ int32_t reprog_device::compute_shared_memory_size() const
   return _prog_size < MAX_SHARED_MEM ? static_cast<int32_t>(_prog_size) : 0;
 }
 
+std::size_t compute_working_memory_size(int32_t num_threads, int32_t insts_count)
+{
+  return relist::alloc_size(insts_count, num_threads) * 2;
+}
+
 }  // namespace detail
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/src/strings/replace/backref_re.cu b/cpp/src/strings/replace/backref_re.cu
index 9658610da18..383337c9088 100644
--- a/cpp/src/strings/replace/backref_re.cu
+++ b/cpp/src/strings/replace/backref_re.cu
@@ -16,6 +16,7 @@
 
 #include "backref_re.cuh"
 
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column.hpp>
@@ -102,19 +103,18 @@ std::pair<std::string, std::vector<backref_type>> parse_backrefs(std::string_vie
 
 //
 std::unique_ptr<column> replace_with_backrefs(strings_column_view const& input,
-                                              std::string_view pattern,
+                                              regex_program const& prog,
                                               std::string_view replacement,
-                                              regex_flags const flags,
                                               rmm::cuda_stream_view stream,
                                               rmm::mr::device_memory_resource* mr)
 {
   if (input.is_empty()) return make_empty_column(type_id::STRING);
 
-  CUDF_EXPECTS(!pattern.empty(), "Parameter pattern must not be empty");
+  CUDF_EXPECTS(!prog.pattern().empty(), "Parameter pattern must not be empty");
   CUDF_EXPECTS(!replacement.empty(), "Parameter replacement must not be empty");
 
-  // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, capture_groups::EXTRACT, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   // parse the repl string for back-ref indicators
   auto group_count = std::min(99, d_prog->group_counts());  // group count should NOT exceed 99
@@ -152,8 +152,18 @@ std::unique_ptr<column> replace_with_backrefs(strings_column_view const& strings
                                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::EXTRACT);
   return detail::replace_with_backrefs(
-    strings, pattern, replacement, flags, cudf::get_default_stream(), mr);
+    strings, *h_prog, replacement, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> replace_with_backrefs(strings_column_view const& strings,
+                                              regex_program const& prog,
+                                              std::string_view replacement,
+                                              rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::replace_with_backrefs(strings, prog, replacement, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/replace/multi_re.cu b/cpp/src/strings/replace/multi_re.cu
index f15496ac159..fcc24f36b5c 100644
--- a/cpp/src/strings/replace/multi_re.cu
+++ b/cpp/src/strings/replace/multi_re.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/regex/regex.cuh>
+#include <strings/regex/regex_program_impl.h>
 
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_device_view.cuh>
@@ -144,7 +145,8 @@ std::unique_ptr<column> replace_re(strings_column_view const& input,
     patterns.size());
   std::transform(
     patterns.begin(), patterns.end(), h_progs.begin(), [flags, stream](auto const& ptn) {
-      return reprog_device::create(ptn, flags, capture_groups::NON_CAPTURE, stream);
+      auto h_prog = regex_program::create(ptn, flags, capture_groups::NON_CAPTURE);
+      return regex_device_builder::create_prog_device(*h_prog, stream);
     });
 
   // get the longest regex for the dispatcher
diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu
index e9cc60f1d64..0e2f3169e8e 100644
--- a/cpp/src/strings/replace/replace_re.cu
+++ b/cpp/src/strings/replace/replace_re.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column.hpp>
@@ -101,10 +102,9 @@ struct replace_regex_fn {
 
 //
 std::unique_ptr<column> replace_re(strings_column_view const& input,
-                                   std::string_view pattern,
+                                   regex_program const& prog,
                                    string_scalar const& replacement,
                                    std::optional<size_type> max_replace_count,
-                                   regex_flags const flags,
                                    rmm::cuda_stream_view stream,
                                    rmm::mr::device_memory_resource* mr)
 {
@@ -113,8 +113,8 @@ std::unique_ptr<column> replace_re(strings_column_view const& input,
   CUDF_EXPECTS(replacement.is_valid(stream), "Parameter replacement must be valid");
   string_view d_repl(replacement.data(), replacement.size());
 
-  // compile regex into device object
-  auto d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   auto const maxrepl = max_replace_count.value_or(-1);
 
@@ -140,10 +140,22 @@ std::unique_ptr<column> replace_re(strings_column_view const& strings,
                                    std::optional<size_type> max_replace_count,
                                    regex_flags const flags,
                                    rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::NON_CAPTURE);
+  return detail::replace_re(
+    strings, *h_prog, replacement, max_replace_count, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> replace_re(strings_column_view const& strings,
+                                   regex_program const& prog,
+                                   string_scalar const& replacement,
+                                   std::optional<size_type> max_replace_count,
+                                   rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
   return detail::replace_re(
-    strings, pattern, replacement, max_replace_count, flags, cudf::get_default_stream(), mr);
+    strings, prog, replacement, max_replace_count, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index b5b8cab65a7..6ab1b3e726b 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column.hpp>
@@ -93,16 +94,15 @@ std::unique_ptr<column> findall_util(column_device_view const& d_strings,
 
 //
 std::unique_ptr<column> findall(strings_column_view const& input,
-                                std::string_view pattern,
-                                regex_flags const flags,
+                                regex_program const& prog,
                                 rmm::cuda_stream_view stream,
                                 rmm::mr::device_memory_resource* mr)
 {
   auto const strings_count = input.size();
   auto const d_strings     = column_device_view::create(input.parent(), stream);
 
-  // compile regex into device object
-  auto const d_prog = reprog_device::create(pattern, flags, capture_groups::NON_CAPTURE, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
 
   // Create lists offsets column
   auto offsets   = count_matches(*d_strings, *d_prog, strings_count + 1, stream, mr);
@@ -138,7 +138,16 @@ std::unique_ptr<column> findall(strings_column_view const& input,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::findall(input, pattern, flags, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern, flags, capture_groups::NON_CAPTURE);
+  return detail::findall(input, *h_prog, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> findall(strings_column_view const& input,
+                                regex_program const& prog,
+                                rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::findall(input, prog, cudf::get_default_stream(), mr);
 }
 
 }  // namespace strings
diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu
index a17c0a575fb..fdd46300820 100644
--- a/cpp/src/strings/split/split_re.cu
+++ b/cpp/src/strings/split/split_re.cu
@@ -15,6 +15,7 @@
  */
 
 #include <strings/count_matches.hpp>
+#include <strings/regex/regex_program_impl.h>
 #include <strings/regex/utilities.cuh>
 
 #include <cudf/column/column.hpp>
@@ -184,13 +185,13 @@ struct tokens_transform_fn {
 };
 
 std::unique_ptr<table> split_re(strings_column_view const& input,
-                                std::string_view pattern,
+                                regex_program const& prog,
                                 split_direction direction,
                                 size_type maxsplit,
                                 rmm::cuda_stream_view stream,
                                 rmm::mr::device_memory_resource* mr)
 {
-  CUDF_EXPECTS(!pattern.empty(), "Parameter pattern must not be empty");
+  CUDF_EXPECTS(!prog.pattern().empty(), "Parameter pattern must not be empty");
 
   auto const strings_count = input.size();
 
@@ -200,8 +201,9 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
     return std::make_unique<table>(std::move(results));
   }
 
-  // create the regex device prog from the given pattern
-  auto d_prog    = reprog_device::create(pattern, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
+
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
@@ -253,18 +255,19 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
 }
 
 std::unique_ptr<column> split_record_re(strings_column_view const& input,
-                                        std::string_view pattern,
+                                        regex_program const& prog,
                                         split_direction direction,
                                         size_type maxsplit,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr)
 {
-  CUDF_EXPECTS(!pattern.empty(), "Parameter pattern must not be empty");
+  CUDF_EXPECTS(!prog.pattern().empty(), "Parameter pattern must not be empty");
 
   auto const strings_count = input.size();
 
-  // create the regex device prog from the given pattern
-  auto d_prog    = reprog_device::create(pattern, stream);
+  // create device object from regex_program
+  auto d_prog = regex_device_builder::create_prog_device(prog, stream);
+
   auto d_strings = column_device_view::create(input.parent(), stream);
 
   // count the number of delimiters matched in each string
@@ -290,39 +293,39 @@ std::unique_ptr<column> split_record_re(strings_column_view const& input,
 }  // namespace
 
 std::unique_ptr<table> split_re(strings_column_view const& input,
-                                std::string_view pattern,
+                                regex_program const& prog,
                                 size_type maxsplit,
                                 rmm::cuda_stream_view stream,
                                 rmm::mr::device_memory_resource* mr)
 {
-  return split_re(input, pattern, split_direction::FORWARD, maxsplit, stream, mr);
+  return split_re(input, prog, split_direction::FORWARD, maxsplit, stream, mr);
 }
 
 std::unique_ptr<column> split_record_re(strings_column_view const& input,
-                                        std::string_view pattern,
+                                        regex_program const& prog,
                                         size_type maxsplit,
                                         rmm::cuda_stream_view stream,
                                         rmm::mr::device_memory_resource* mr)
 {
-  return split_record_re(input, pattern, split_direction::FORWARD, maxsplit, stream, mr);
+  return split_record_re(input, prog, split_direction::FORWARD, maxsplit, stream, mr);
 }
 
 std::unique_ptr<table> rsplit_re(strings_column_view const& input,
-                                 std::string_view pattern,
+                                 regex_program const& prog,
                                  size_type maxsplit,
                                  rmm::cuda_stream_view stream,
                                  rmm::mr::device_memory_resource* mr)
 {
-  return split_re(input, pattern, split_direction::BACKWARD, maxsplit, stream, mr);
+  return split_re(input, prog, split_direction::BACKWARD, maxsplit, stream, mr);
 }
 
 std::unique_ptr<column> rsplit_record_re(strings_column_view const& input,
-                                         std::string_view pattern,
+                                         regex_program const& prog,
                                          size_type maxsplit,
                                          rmm::cuda_stream_view stream,
                                          rmm::mr::device_memory_resource* mr)
 {
-  return split_record_re(input, pattern, split_direction::BACKWARD, maxsplit, stream, mr);
+  return split_record_re(input, prog, split_direction::BACKWARD, maxsplit, stream, mr);
 }
 
 }  // namespace detail
@@ -335,7 +338,17 @@ std::unique_ptr<table> split_re(strings_column_view const& input,
                                 rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::split_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern);
+  return detail::split_re(input, *h_prog, maxsplit, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<table> split_re(strings_column_view const& input,
+                                regex_program const& prog,
+                                size_type maxsplit,
+                                rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::split_re(input, prog, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> split_record_re(strings_column_view const& input,
@@ -344,7 +357,17 @@ std::unique_ptr<column> split_record_re(strings_column_view const& input,
                                         rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::split_record_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern);
+  return detail::split_record_re(input, *h_prog, maxsplit, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<column> split_record_re(strings_column_view const& input,
+                                        regex_program const& prog,
+                                        size_type maxsplit,
+                                        rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::split_record_re(input, prog, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<table> rsplit_re(strings_column_view const& input,
@@ -353,7 +376,17 @@ std::unique_ptr<table> rsplit_re(strings_column_view const& input,
                                  rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rsplit_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern);
+  return detail::rsplit_re(input, *h_prog, maxsplit, cudf::get_default_stream(), mr);
+}
+
+std::unique_ptr<table> rsplit_re(strings_column_view const& input,
+                                 regex_program const& prog,
+                                 size_type maxsplit,
+                                 rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::rsplit_re(input, prog, maxsplit, cudf::get_default_stream(), mr);
 }
 
 std::unique_ptr<column> rsplit_record_re(strings_column_view const& input,
@@ -362,7 +395,18 @@ std::unique_ptr<column> rsplit_record_re(strings_column_view const& input,
                                          rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  return detail::rsplit_record_re(input, pattern, maxsplit, cudf::get_default_stream(), mr);
+  auto const h_prog = regex_program::create(pattern);
+  return detail::rsplit_record_re(input, *h_prog, maxsplit, cudf::get_default_stream(), mr);
 }
+
+std::unique_ptr<column> rsplit_record_re(strings_column_view const& input,
+                                         regex_program const& prog,
+                                         size_type maxsplit,
+                                         rmm::mr::device_memory_resource* mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::rsplit_record_re(input, prog, maxsplit, cudf::get_default_stream(), mr);
+}
+
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index aaacc08d5fb..43ef73baf14 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -14,13 +14,15 @@
  * limitations under the License.
  */
 
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/strings/contains.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/strings/contains.hpp>
+#include <cudf/strings/regex/regex_program.hpp>
+#include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/host_vector.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -147,6 +149,9 @@ TEST_F(StringsContainsTests, ContainsTest)
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(ptn);
+    results   = cudf::strings::contains_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
 
@@ -161,40 +166,56 @@ TEST_F(StringsContainsTests, MatchesTest)
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results      = cudf::strings::matches_re(strings_view, "lazy");
-    bool h_expected[] = {false, false, true, false, false, false, false};
+    auto const pattern = std::string("lazy");
+    auto results       = cudf::strings::matches_re(strings_view, pattern);
+    bool h_expected[]  = {false, false, true, false, false, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, "\\d+");
-    bool h_expected[] = {false, false, false, true, true, false, false};
+    auto const pattern = std::string("\\d+");
+    auto results       = cudf::strings::matches_re(strings_view, pattern);
+    bool h_expected[]  = {false, false, false, true, true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, "@\\w+");
-    bool h_expected[] = {false, false, false, false, false, false, false};
+    auto const pattern = std::string("@\\w+");
+    auto results       = cudf::strings::matches_re(strings_view, pattern);
+    bool h_expected[]  = {false, false, false, false, false, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, ".*");
-    bool h_expected[] = {true, true, true, true, true, false, true};
+    auto const pattern = std::string(".*");
+    auto results       = cudf::strings::matches_re(strings_view, pattern);
+    bool h_expected[]  = {true, true, true, true, true, false, true};
     cudf::test::fixed_width_column_wrapper<bool> expected(
       h_expected,
       h_expected + h_strings.size(),
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
 
@@ -219,6 +240,9 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)
     cudf::test::fixed_width_column_wrapper<bool> expected(
       {true, true, false, false, false, false, true, true, true, true});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
   }
   {  // is_loopback: 72 instructions
     std::string pattern =
@@ -229,6 +253,9 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)
     cudf::test::fixed_width_column_wrapper<bool> expected(
       {false, false, false, false, false, false, false, false, false, true});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
   }
   {  // is_multicast: 79 instructions
     std::string pattern =
@@ -239,6 +266,9 @@ TEST_F(StringsContainsTests, MatchesIPV4Test)
     cudf::test::fixed_width_column_wrapper<bool> expected(
       {false, false, false, false, false, false, true, true, false, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::matches_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
   }
 }
 
@@ -247,18 +277,43 @@ TEST_F(StringsContainsTests, OctalTest)
   cudf::test::strings_column_wrapper strings({"A3", "B", "CDA3EY", "", "99", "\a\t\r"});
   auto strings_view = cudf::strings_column_view(strings);
   auto expected     = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 1, 0, 0, 0});
-  auto results      = cudf::strings::contains_re(strings_view, "\\101");
+
+  auto pattern = std::string("\\101");
+  auto results = cudf::strings::contains_re(strings_view, pattern);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  results = cudf::strings::contains_re(strings_view, "\\1013");
+
+  pattern = std::string("\\1013");
+  results = cudf::strings::contains_re(strings_view, pattern);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  pattern = std::string("D*\\101\\063");
+  results = cudf::strings::contains_re(strings_view, pattern);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  results = cudf::strings::contains_re(strings_view, "D*\\101\\063");
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  results  = cudf::strings::contains_re(strings_view, "\\719");
+
+  pattern  = std::string("\\719");
+  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-  results  = cudf::strings::contains_re(strings_view, "[\\7][\\11][\\15]");
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
+  pattern  = std::string("[\\7][\\11][\\15]");
+  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 0, 1});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
 TEST_F(StringsContainsTests, HexTest)
@@ -285,10 +340,17 @@ TEST_F(StringsContainsTests, HexTest)
       0, [ch](auto idx) { return ch == static_cast<char>(idx); });
     cudf::test::fixed_width_column_wrapper<bool> expected(true_dat, true_dat + count);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::contains_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+
     // also test hex character appearing in character class brackets
     pattern = "[" + pattern + "]";
     results = cudf::strings::contains_re(strings_view, pattern);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    prog    = cudf::strings::regex_program::create(pattern);
+    results = cudf::strings::contains_re(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
 
@@ -303,36 +365,56 @@ TEST_F(StringsContainsTests, EmbeddedNullCharacter)
   cudf::test::strings_column_wrapper input(data.begin(), data.end());
   auto strings_view = cudf::strings_column_view(input);
 
-  auto results  = cudf::strings::contains_re(strings_view, "A");
+  auto pattern  = std::string("A");
+  auto results  = cudf::strings::contains_re(strings_view, pattern);
   auto expected = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 0, 0, 0, 0, 0, 0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-  results  = cudf::strings::contains_re(strings_view, "B");
+  pattern  = std::string("B");
+  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-  results  = cudf::strings::contains_re(strings_view, "J\\0B");
+  pattern  = std::string("J\\0B");
+  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 0, 0, 0, 0, 0, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-  results  = cudf::strings::contains_re(strings_view, "[G-J][\\0]B");
+  pattern  = std::string("[G-J][\\0]B");
+  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 0, 0, 0, 0, 1, 1, 1, 1});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 
-  results  = cudf::strings::contains_re(strings_view, "[A-D][\\x00]B");
+  pattern  = std::string("[A-D][\\x00]B");
+  results  = cudf::strings::contains_re(strings_view, pattern);
   expected = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 1, 0, 0, 0, 0, 0, 0});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::contains_re(strings_view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
 }
 
 TEST_F(StringsContainsTests, Errors)
 {
-  cudf::test::strings_column_wrapper input({"3", "33"});
-  auto strings_view = cudf::strings_column_view(input);
+  EXPECT_THROW(cudf::strings::regex_program::create("(3?)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("(?:3?)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("3?+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("{3}a"), cudf::logic_error);
 
-  EXPECT_THROW(cudf::strings::contains_re(strings_view, "(3?)+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::contains_re(strings_view, "(?:3?)+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::contains_re(strings_view, "3?+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::count_re(strings_view, "{3}a"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("aaaa{1234,5678}"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("aaaa{123,5678}"), cudf::logic_error);
 }
 
 TEST_F(StringsContainsTests, CountTest)
@@ -340,36 +422,37 @@ TEST_F(StringsContainsTests, CountTest)
   std::vector<const char*> h_strings{
     "The quick brown @fox jumps ovér the", "lazy @dog", "1:2:3:4", "00:0:00", nullptr, ""};
   cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results         = cudf::strings::count_re(strings_view, "[tT]he");
-    int32_t h_expected[] = {2, 0, 0, 0, 0, 0};
+    auto pattern = std::string("[tT]he");
+    auto results = cudf::strings::count_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
-      h_expected,
-      h_expected + h_strings.size(),
-      thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+      {2, 0, 0, 0, 0, 0}, cudf::test::iterators::nulls_from_nullptrs(h_strings));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results         = cudf::strings::count_re(strings_view, "@\\w+");
-    int32_t h_expected[] = {1, 1, 0, 0, 0, 0};
+    auto pattern = std::string("@\\w+");
+    auto results = cudf::strings::count_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
-      h_expected,
-      h_expected + h_strings.size(),
-      thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+      {1, 1, 0, 0, 0, 0}, cudf::test::iterators::nulls_from_nullptrs(h_strings));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results         = cudf::strings::count_re(strings_view, "\\d+:\\d+");
-    int32_t h_expected[] = {0, 0, 2, 1, 0, 0};
+    auto pattern = std::string("\\d+:\\d+");
+    auto results = cudf::strings::count_re(strings_view, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected(
-      h_expected,
-      h_expected + h_strings.size(),
-      thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+      {0, 0, 2, 1, 0, 0}, cudf::test::iterators::nulls_from_nullptrs(h_strings));
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(strings_view, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -381,67 +464,90 @@ TEST_F(StringsContainsTests, FixedQuantifier)
 
   {
     // exact match
-    auto results = cudf::strings::count_re(sv, "a{3}");
+    auto pattern = std::string("a{3}");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 1, 1, 1, 2});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // range match (greedy quantifier)
-    auto results = cudf::strings::count_re(sv, "a{3,5}");
+    auto pattern = std::string("a{3,5}");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 1, 1, 1, 1});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // minimum match (greedy quantifier)
-    auto results = cudf::strings::count_re(sv, "a{2,}");
+    auto pattern = std::string("a{2,}");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 1, 1, 1, 1, 1});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // range match (lazy quantifier)
-    auto results = cudf::strings::count_re(sv, "a{2,4}?");
+    auto pattern = std::string("a{2,4}?");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 1, 1, 2, 2, 3});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // minimum match (lazy quantifier)
-    auto results = cudf::strings::count_re(sv, "a{1,}?");
+    auto pattern = std::string("a{1,}?");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 2, 3, 4, 5, 6});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // zero match
-    auto results = cudf::strings::count_re(sv, "aaaa{0}");
+    auto pattern = std::string("aaaa{0}");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 1, 1, 1, 2});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     // poorly formed
-    auto results = cudf::strings::count_re(sv, "aaaa{n,m}");
+    auto pattern = std::string("aaaa{n,m}");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 0, 0, 0, 0, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    EXPECT_THROW(cudf::strings::count_re(sv, "aaaa{1234,5678}"), cudf::logic_error);
-    EXPECT_THROW(cudf::strings::count_re(sv, "aaaa{123,5678}"), cudf::logic_error);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
 
 TEST_F(StringsContainsTests, QuantifierErrors)
 {
-  auto input = cudf::test::strings_column_wrapper({"a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa"});
-  auto sv    = cudf::strings_column_view(input);
-
-  EXPECT_THROW(cudf::strings::contains_re(sv, "^+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::count_re(sv, "$+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::count_re(sv, "(^)+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::contains_re(sv, "($)+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::count_re(sv, "\\A+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::count_re(sv, "\\Z+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::contains_re(sv, "(\\A)+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::contains_re(sv, "(\\Z)+"), cudf::logic_error);
-
-  EXPECT_THROW(cudf::strings::contains_re(sv, "(^($))+"), cudf::logic_error);
-  EXPECT_NO_THROW(cudf::strings::contains_re(sv, "(^a($))+"));
-  EXPECT_NO_THROW(cudf::strings::count_re(sv, "(^(a$))+"));
+  EXPECT_THROW(cudf::strings::regex_program::create("^+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("$+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("(^)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("($)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("\\A+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("\\Z+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("(\\A)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("(\\Z)+"), cudf::logic_error);
+
+  EXPECT_THROW(cudf::strings::regex_program::create("(^($))+"), cudf::logic_error);
+  EXPECT_NO_THROW(cudf::strings::regex_program::create("(^a($))+"));
+  EXPECT_NO_THROW(cudf::strings::regex_program::create("(^(a$))+"));
 }
 
 TEST_F(StringsContainsTests, OverlappedClasses)
@@ -450,14 +556,22 @@ TEST_F(StringsContainsTests, OverlappedClasses)
   auto sv = cudf::strings_column_view(input);
 
   {
-    auto results = cudf::strings::count_re(sv, "[e-gb-da-c]");
+    auto pattern = std::string("[e-gb-da-c]");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({7, 4, 0, 0, 1});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results = cudf::strings::count_re(sv, "[á-éê-ú]");
+    auto pattern = std::string("[á-éê-ú]");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({0, 1, 0, 6, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
 
@@ -467,14 +581,22 @@ TEST_F(StringsContainsTests, NegatedClasses)
   auto sv    = cudf::strings_column_view(input);
 
   {
-    auto results = cudf::strings::count_re(sv, "[^a-f]");
+    auto pattern = std::string("[^a-f]");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 4, 0, 5, 3});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
-    auto results = cudf::strings::count_re(sv, "[^a-eá-é]");
+    auto pattern = std::string("[^a-eá-é]");
+    auto results = cudf::strings::count_re(sv, pattern);
     cudf::test::fixed_width_column_wrapper<int32_t> expected({2, 5, 0, 1, 3});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::count_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
 
@@ -487,14 +609,18 @@ TEST_F(StringsContainsTests, IncompleteClassesRange)
     cudf::test::fixed_width_column_wrapper<bool> expected({1, 0, 0, 1, 1});
     auto results = cudf::strings::contains_re(sv, "[a-z]");
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    results = cudf::strings::contains_re(sv, "[a-m-z]");  // same as [a-z]
+
+    auto prog = cudf::strings::regex_program::create("[a-m-z]");  // same as [a-z]
+    results   = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
     cudf::test::fixed_width_column_wrapper<bool> expected({1, 1, 0, 1, 1});
     auto results = cudf::strings::contains_re(sv, "[g-]");
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    results = cudf::strings::contains_re(sv, "[-k]");
+
+    auto prog = cudf::strings::regex_program::create("[-k]");
+    results   = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
   {
@@ -503,9 +629,12 @@ TEST_F(StringsContainsTests, IncompleteClassesRange)
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
     results = cudf::strings::contains_re(sv, "[+--]");
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    results = cudf::strings::contains_re(sv, "[a-c-]");
+
+    auto prog = cudf::strings::regex_program::create("[a-c-]");
+    results   = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
-    results = cudf::strings::contains_re(sv, "[-d-f]");
+    prog    = cudf::strings::regex_program::create("[-d-f]");
+    results = cudf::strings::contains_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
   }
 }
@@ -516,26 +645,43 @@ TEST_F(StringsContainsTests, MultiLine)
     cudf::test::strings_column_wrapper({"abc\nfff\nabc", "fff\nabc\nlll", "abc", "", "abc\n"});
   auto view = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::contains_re(view, "^abc$", cudf::strings::regex_flags::MULTILINE);
+  auto pattern = std::string("^abc$");
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto prog_ml =
+    cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::MULTILINE);
+
+  auto results = cudf::strings::contains_re(view, pattern, cudf::strings::regex_flags::MULTILINE);
   auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 0, 1});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results           = cudf::strings::contains_re(view, "^abc$");
+  results = cudf::strings::contains_re(view, *prog_ml);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+  results           = cudf::strings::contains_re(view, pattern);
   expected_contains = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+  results = cudf::strings::contains_re(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
 
-  results = cudf::strings::matches_re(view, "^abc$", cudf::strings::regex_flags::MULTILINE);
+  results = cudf::strings::matches_re(view, pattern, cudf::strings::regex_flags::MULTILINE);
   auto expected_matches = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 1, 0, 1});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results          = cudf::strings::matches_re(view, "^abc$");
+  results = cudf::strings::matches_re(view, *prog_ml);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
+  results          = cudf::strings::matches_re(view, pattern);
   expected_matches = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
+  results = cudf::strings::matches_re(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
 
-  results = cudf::strings::count_re(view, "^abc$", cudf::strings::regex_flags::MULTILINE);
+  results = cudf::strings::count_re(view, pattern, cudf::strings::regex_flags::MULTILINE);
   auto expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0, 1});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results        = cudf::strings::count_re(view, "^abc$");
+  results = cudf::strings::count_re(view, *prog_ml);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
+  results        = cudf::strings::count_re(view, pattern);
   expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({0, 0, 1, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
+  results = cudf::strings::count_re(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
 }
 
 TEST_F(StringsContainsTests, DotAll)
@@ -543,31 +689,55 @@ TEST_F(StringsContainsTests, DotAll)
   auto input = cudf::test::strings_column_wrapper({"abc\nfa\nef", "fff\nabbc\nfff", "abcdef", ""});
   auto view  = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::contains_re(view, "a.*f", cudf::strings::regex_flags::DOTALL);
+  auto pattern = std::string("a.*f");
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto prog_dotall =
+    cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
+
+  auto results = cudf::strings::contains_re(view, pattern, cudf::strings::regex_flags::DOTALL);
   auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
-  results           = cudf::strings::contains_re(view, "a.*f");
+  results = cudf::strings::contains_re(view, *prog_dotall);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+  results           = cudf::strings::contains_re(view, pattern);
   expected_contains = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+  results = cudf::strings::contains_re(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
 
-  results = cudf::strings::matches_re(view, "a.*f", cudf::strings::regex_flags::DOTALL);
+  results = cudf::strings::matches_re(view, pattern, cudf::strings::regex_flags::DOTALL);
   auto expected_matches = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
-  results          = cudf::strings::matches_re(view, "a.*f");
+  results = cudf::strings::matches_re(view, *prog_dotall);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
+  results          = cudf::strings::matches_re(view, pattern);
   expected_matches = cudf::test::fixed_width_column_wrapper<bool>({0, 0, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
+  results = cudf::strings::matches_re(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_matches);
+
+  pattern     = std::string("a.*?f");
+  prog        = cudf::strings::regex_program::create(pattern);
+  prog_dotall = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
 
-  results             = cudf::strings::count_re(view, "a.*?f", cudf::strings::regex_flags::DOTALL);
+  results             = cudf::strings::count_re(view, pattern, cudf::strings::regex_flags::DOTALL);
   auto expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
-  results        = cudf::strings::count_re(view, "a.*?f");
+  results = cudf::strings::count_re(view, *prog_dotall);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
+  results        = cudf::strings::count_re(view, pattern);
   expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({0, 0, 1, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
+  results = cudf::strings::count_re(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
 
-  auto both_flags = cudf::strings::regex_flags::DOTALL | cudf::strings::regex_flags::MULTILINE;
-  results =
-    cudf::strings::count_re(view, "a.*?f", static_cast<cudf::strings::regex_flags>(both_flags));
-  expected_count = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0});
+  auto both_flags = static_cast<cudf::strings::regex_flags>(cudf::strings::regex_flags::DOTALL |
+                                                            cudf::strings::regex_flags::MULTILINE);
+  results         = cudf::strings::count_re(view, pattern, both_flags);
+  expected_count  = cudf::test::fixed_width_column_wrapper<int32_t>({2, 1, 1, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
+  auto prog_both = cudf::strings::regex_program::create(pattern, both_flags);
+  results        = cudf::strings::count_re(view, *prog_both);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_count);
 }
 
@@ -586,9 +756,16 @@ TEST_F(StringsContainsTests, ASCII)
     auto results = cudf::strings::contains_re(view, ptn, cudf::strings::regex_flags::ASCII);
     auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 0, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+    auto prog = cudf::strings::regex_program::create(ptn, cudf::strings::regex_flags::ASCII);
+    results   = cudf::strings::contains_re(view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+
     results           = cudf::strings::contains_re(view, ptn);
     expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 1, 1, 1});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
+    prog    = cudf::strings::regex_program::create(ptn);
+    results = cudf::strings::contains_re(view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_contains);
   }
 }
 
diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp
index e396ca42d6c..62d7ef2a418 100644
--- a/cpp/tests/strings/extract_tests.cpp
+++ b/cpp/tests/strings/extract_tests.cpp
@@ -21,6 +21,7 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/strings/extract.hpp>
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table_view.hpp>
 
@@ -76,6 +77,10 @@ TEST_F(StringsExtractTests, ExtractTest)
   columns.push_back(expected2.release());
   cudf::table expected(std::move(columns));
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
+
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::extract(strings_view, pattern);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
 TEST_F(StringsExtractTests, ExtractDomainTest)
@@ -117,6 +122,10 @@ TEST_F(StringsExtractTests, ExtractDomainTest)
   });
   cudf::table_view expected{{expected1}};
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
+
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::extract(strings_view, *prog);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
 TEST_F(StringsExtractTests, ExtractEventTest)
@@ -144,9 +153,13 @@ TEST_F(StringsExtractTests, ExtractEventTest)
                                       "Test Message Description"});
 
   for (std::size_t idx = 0; idx < patterns.size(); ++idx) {
-    auto results = cudf::strings::extract(strings_view, patterns[idx]);
+    auto pattern = patterns[idx];
+    auto results = cudf::strings::extract(strings_view, pattern);
     cudf::test::strings_column_wrapper expected({expecteds[idx]});
-    CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view().column(0), expected);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view().column(0), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    results   = cudf::strings::extract(strings_view, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view().column(0), expected);
   }
 }
 
@@ -156,15 +169,24 @@ TEST_F(StringsExtractTests, MultiLine)
     cudf::test::strings_column_wrapper({"abc\nfff\nabc", "fff\nabc\nlll", "abc", "", "abc\n"});
   auto view = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::extract(view, "(^[a-c]+$)", cudf::strings::regex_flags::MULTILINE);
+  auto pattern = std::string("(^[a-c]+$)");
+  auto results = cudf::strings::extract(view, pattern, cudf::strings::regex_flags::MULTILINE);
   cudf::test::strings_column_wrapper expected_multiline({"abc", "abc", "abc", "", "abc"},
                                                         {1, 1, 1, 0, 1});
   auto expected = cudf::table_view{{expected_multiline}};
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
-  results = cudf::strings::extract(view, "^([a-c]+)$");
+  auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::MULTILINE);
+  results   = cudf::strings::extract(view, *prog);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
+
+  pattern = std::string("^([a-c]+)$");
+  results = cudf::strings::extract(view, pattern);
   cudf::test::strings_column_wrapper expected_default({"", "", "abc", "", ""}, {0, 0, 1, 0, 0});
   expected = cudf::table_view{{expected_default}};
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::extract(view, *prog);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
 TEST_F(StringsExtractTests, DotAll)
@@ -172,15 +194,23 @@ TEST_F(StringsExtractTests, DotAll)
   auto input = cudf::test::strings_column_wrapper({"abc\nfa\nef", "fff\nabbc\nfff", "abcdef", ""});
   auto view  = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::extract(view, "(a.*f)", cudf::strings::regex_flags::DOTALL);
+  auto pattern = std::string("(a.*f)");
+  auto results = cudf::strings::extract(view, pattern, cudf::strings::regex_flags::DOTALL);
   cudf::test::strings_column_wrapper expected_dotall({"abc\nfa\nef", "abbc\nfff", "abcdef", ""},
                                                      {1, 1, 1, 0});
   auto expected = cudf::table_view{{expected_dotall}};
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
-  results = cudf::strings::extract(view, "(a.*f)");
+  auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
+  results   = cudf::strings::extract(view, *prog);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
+
+  results = cudf::strings::extract(view, pattern);
   cudf::test::strings_column_wrapper expected_default({"", "", "abcdef", ""}, {0, 0, 1, 0});
   expected = cudf::table_view{{expected_default}};
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::extract(view, *prog);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, expected);
 }
 
 TEST_F(StringsExtractTests, EmptyExtractTest)
@@ -192,7 +222,8 @@ TEST_F(StringsExtractTests, EmptyExtractTest)
     thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
   auto strings_view = cudf::strings_column_view(strings);
 
-  auto results = cudf::strings::extract(strings_view, "([^_]*)\\Z");
+  auto pattern = std::string("([^_]*)\\Z");
+  auto results = cudf::strings::extract(strings_view, pattern);
 
   std::vector<const char*> h_expected{nullptr, "AAA", "A", "", "", ""};
   cudf::test::strings_column_wrapper expected(
@@ -203,6 +234,9 @@ TEST_F(StringsExtractTests, EmptyExtractTest)
   columns.push_back(expected.release());
   cudf::table table_expected(std::move(columns));
   CUDF_TEST_EXPECT_TABLES_EQUAL(*results, table_expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::extract(strings_view, *prog);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*results, table_expected);
 }
 
 TEST_F(StringsExtractTests, ExtractAllTest)
@@ -214,7 +248,8 @@ TEST_F(StringsExtractTests, ExtractAllTest)
   cudf::test::strings_column_wrapper input(h_input.begin(), h_input.end(), validity);
   auto sv = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::extract_all_record(sv, "(\\d+) (\\w+)");
+  auto pattern = std::string("(\\d+) (\\w+)");
+  auto results = cudf::strings::extract_all_record(sv, pattern);
 
   bool valids[] = {true, true, true, false, false, false, true};
   using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
@@ -226,15 +261,24 @@ TEST_F(StringsExtractTests, ExtractAllTest)
                 LCW{},
                 LCW{"4", "pare"}},
                valids);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::extract_all_record(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
 TEST_F(StringsExtractTests, Errors)
 {
   cudf::test::strings_column_wrapper input({"this column intentionally left blank"});
   auto sv = cudf::strings_column_view(input);
-  EXPECT_THROW(cudf::strings::extract(sv, "\\w+"), cudf::logic_error);
-  EXPECT_THROW(cudf::strings::extract_all_record(sv, "\\w+"), cudf::logic_error);
+
+  auto pattern = std::string("\\w+");
+  auto prog    = cudf::strings::regex_program::create(pattern);
+
+  EXPECT_THROW(cudf::strings::extract(sv, pattern), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::extract(sv, *prog), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::extract_all_record(sv, pattern), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::extract_all_record(sv, *prog), cudf::logic_error);
 }
 
 TEST_F(StringsExtractTests, MediumRegex)
diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp
index 1dd088cb70f..6428be28e0a 100644
--- a/cpp/tests/strings/findall_tests.cpp
+++ b/cpp/tests/strings/findall_tests.cpp
@@ -20,6 +20,7 @@
 #include <cudf_test/table_utilities.hpp>
 
 #include <cudf/strings/findall.hpp>
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
@@ -35,8 +36,10 @@ TEST_F(StringsFindallTests, FindallTest)
   cudf::test::strings_column_wrapper input(
     {"3-A", "4-May 5-Day 6-Hay", "12-Dec-2021-Jan", "Feb-March", "4 ABC", "", "", "25-9000-Hal"},
     valids);
+  auto sv = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::findall(cudf::strings_column_view(input), "(\\d+)-(\\w+)");
+  auto pattern = std::string("(\\d+)-(\\w+)");
+  auto results = cudf::strings::findall(sv, pattern);
 
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"3-A"},
@@ -49,6 +52,9 @@ TEST_F(StringsFindallTests, FindallTest)
                 LCW{"25-9000"}},
                valids);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::findall(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
 TEST_F(StringsFindallTests, Multiline)
@@ -56,10 +62,14 @@ TEST_F(StringsFindallTests, Multiline)
   cudf::test::strings_column_wrapper input({"abc\nfff\nabc", "fff\nabc\nlll", "abc", "", "abc\n"});
   auto view = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::findall(view, "(^abc$)", cudf::strings::regex_flags::MULTILINE);
+  auto pattern = std::string("(^abc$)");
+  auto results = cudf::strings::findall(view, pattern, cudf::strings::regex_flags::MULTILINE);
   using LCW    = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"abc", "abc"}, LCW{"abc"}, LCW{"abc"}, LCW{}, LCW{"abc"}});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::MULTILINE);
+  results   = cudf::strings::findall(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
 TEST_F(StringsFindallTests, DotAll)
@@ -67,10 +77,14 @@ TEST_F(StringsFindallTests, DotAll)
   cudf::test::strings_column_wrapper input({"abc\nfa\nef", "fff\nabbc\nfff", "abcdef", ""});
   auto view = cudf::strings_column_view(input);
 
-  auto results = cudf::strings::findall(view, "(b.*f)", cudf::strings::regex_flags::DOTALL);
+  auto pattern = std::string("(b.*f)");
+  auto results = cudf::strings::findall(view, pattern, cudf::strings::regex_flags::DOTALL);
   using LCW    = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"bc\nfa\nef"}, LCW{"bbc\nfff"}, LCW{"bcdef"}, LCW{}});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  auto prog = cudf::strings::regex_program::create(pattern, cudf::strings::regex_flags::DOTALL);
+  results   = cudf::strings::findall(view, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
 TEST_F(StringsFindallTests, MediumRegex)
diff --git a/cpp/tests/strings/replace_regex_tests.cpp b/cpp/tests/strings/replace_regex_tests.cpp
index 6280463d112..840d998e56c 100644
--- a/cpp/tests/strings/replace_regex_tests.cpp
+++ b/cpp/tests/strings/replace_regex_tests.cpp
@@ -17,7 +17,9 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/replace_re.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 
@@ -39,9 +41,7 @@ TEST_F(StringsReplaceRegexTest, ReplaceRegexTest)
                                      nullptr};
 
   cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
   auto strings_view = cudf::strings_column_view(strings);
 
   std::vector<const char*> h_expected{"= quick brown fox jumps over = lazy dog",
@@ -52,13 +52,15 @@ TEST_F(StringsReplaceRegexTest, ReplaceRegexTest)
                                       "",
                                       nullptr};
 
-  std::string pattern = "(\\bthe\\b)";
-  auto results        = cudf::strings::replace_re(strings_view, pattern, cudf::string_scalar("="));
+  auto pattern = std::string("(\\bthe\\b)");
+  auto repl    = cudf::string_scalar("=");
+  auto results = cudf::strings::replace_re(strings_view, pattern, repl);
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_re(strings_view, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ReplaceMultiRegexTest)
@@ -72,9 +74,7 @@ TEST_F(StringsReplaceRegexTest, ReplaceMultiRegexTest)
                                      nullptr};
 
   cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
   auto strings_view = cudf::strings_column_view(strings);
 
   std::vector<const char*> h_expected{" quick brown fox jumps over  lazy dog",
@@ -91,101 +91,132 @@ TEST_F(StringsReplaceRegexTest, ReplaceMultiRegexTest)
   auto repls_view = cudf::strings_column_view(repls);
   auto results    = cudf::strings::replace_re(strings_view, patterns, repls_view);
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, InvalidRegex)
 {
-  cudf::test::strings_column_wrapper strings(
-    {"abc*def|ghi+jkl", ""});  // these do not really matter
-  auto strings_view = cudf::strings_column_view(strings);
-
   // these are quantifiers that do not have a preceding character/class
-  EXPECT_THROW(cudf::strings::replace_re(strings_view, "*", cudf::string_scalar("")),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_re(strings_view, "|", cudf::string_scalar("")),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_re(strings_view, "+", cudf::string_scalar("")),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_re(strings_view, "ab(*)", cudf::string_scalar("")),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_re(strings_view, "\\", cudf::string_scalar("")),
-               cudf::logic_error);
-  EXPECT_THROW(cudf::strings::replace_re(strings_view, "\\p", cudf::string_scalar("")),
-               cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("*"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("|"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("ab(*)"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("\\"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::regex_program::create("\\p"), cudf::logic_error);
 }
 
 TEST_F(StringsReplaceRegexTest, WithEmptyPattern)
 {
   std::vector<const char*> h_strings{"asd", "xcv"};
   cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
   auto strings_view = cudf::strings_column_view(strings);
-  std::vector<std::string> patterns({""});
+
+  auto empty_pattern = std::string("");
+  auto repl          = cudf::string_scalar("bbb");
+  std::vector<std::string> patterns({empty_pattern});
   cudf::test::strings_column_wrapper repls({"bbb"});
   auto repls_view = cudf::strings_column_view(repls);
   auto results    = cudf::strings::replace_re(strings_view, patterns, repls_view);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
-  results = cudf::strings::replace_re(strings_view, "", cudf::string_scalar("bbb"));
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, strings);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
+  results = cudf::strings::replace_re(strings_view, "", repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
+  auto prog = cudf::strings::regex_program::create(empty_pattern);
+  results   = cudf::strings::replace_re(strings_view, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, strings);
 }
 
 TEST_F(StringsReplaceRegexTest, MultiReplacement)
 {
   cudf::test::strings_column_wrapper input({"aba bcd aba", "abababa abababa"});
-  auto results =
-    cudf::strings::replace_re(cudf::strings_column_view(input), "aba", cudf::string_scalar("_"), 2);
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern = std::string("aba");
+  auto repl    = cudf::string_scalar("_");
+  auto results = cudf::strings::replace_re(sv, pattern, repl, 2);
   cudf::test::strings_column_wrapper expected({"_ bcd _", "_b_ abababa"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  results =
-    cudf::strings::replace_re(cudf::strings_column_view(input), "aba", cudf::string_scalar(""), 0);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_re(sv, *prog, repl, 2);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+
+  results = cudf::strings::replace_re(sv, pattern, repl, 0);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, input);
+  results = cudf::strings::replace_re(sv, *prog, repl, 0);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, input);
 }
 
 TEST_F(StringsReplaceRegexTest, WordBoundary)
 {
   cudf::test::strings_column_wrapper input({"aba bcd\naba", "zéz", "A1B2-é3", "e é", "_", "a_b"});
-  auto results =
-    cudf::strings::replace_re(cudf::strings_column_view(input), "\\b", cudf::string_scalar("X"));
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern  = std::string("\\b");
+  auto repl     = cudf::string_scalar("X");
+  auto results  = cudf::strings::replace_re(sv, pattern, repl);
   auto expected = cudf::test::strings_column_wrapper(
     {"XabaX XbcdX\nXabaX", "XzézX", "XA1B2X-Xé3X", "XeX XéX", "X_X", "Xa_bX"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  results =
-    cudf::strings::replace_re(cudf::strings_column_view(input), "\\B", cudf::string_scalar("X"));
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+
+  pattern  = std::string("\\B");
+  results  = cudf::strings::replace_re(sv, pattern, repl);
   expected = cudf::test::strings_column_wrapper(
     {"aXbXa bXcXd\naXbXa", "zXéXz", "AX1XBX2-éX3", "e é", "_", "aX_Xb"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, Alternation)
 {
   cudf::test::strings_column_wrapper input(
     {"16  6  brr  232323  1  hello  90", "123 ABC 00 2022", "abé123  4567  89xyz"});
-  auto results = cudf::strings::replace_re(
-    cudf::strings_column_view(input), "(^|\\s)\\d+(\\s|$)", cudf::string_scalar("_"));
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern = std::string("(^|\\s)\\d+(\\s|$)");
+  auto repl    = cudf::string_scalar("_");
+  auto results = cudf::strings::replace_re(sv, pattern, repl);
   auto expected =
     cudf::test::strings_column_wrapper({"__ brr __ hello _", "_ABC_2022", "abé123 _ 89xyz"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  results = cudf::strings::replace_re(
-    cudf::strings_column_view(input), "(\\s|^)\\d+($|\\s)", cudf::string_scalar("_"));
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+
+  pattern = std::string("(\\s|^)\\d+($|\\s)");
+  results = cudf::strings::replace_re(sv, pattern, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::replace_re(sv, *prog, repl);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ZeroLengthMatch)
 {
   cudf::test::strings_column_wrapper input({"DD", "zéz", "DsDs", ""});
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern  = std::string("D*");
   auto repl     = cudf::string_scalar("_");
-  auto results  = cudf::strings::replace_re(cudf::strings_column_view(input), "D*", repl);
+  auto results  = cudf::strings::replace_re(sv, pattern, repl);
   auto expected = cudf::test::strings_column_wrapper({"__", "_z_é_z_", "__s__s_", "_"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
-  results  = cudf::strings::replace_re(cudf::strings_column_view(input), "D?s?", repl);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+
+  pattern  = std::string("D?s?");
+  results  = cudf::strings::replace_re(sv, pattern, repl);
   expected = cudf::test::strings_column_wrapper({"___", "_z_é_z_", "___", "_"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, Multiline)
@@ -196,14 +227,21 @@ TEST_F(StringsReplaceRegexTest, Multiline)
   auto sv = cudf::strings_column_view(input);
 
   // single-replace
-  auto results =
-    cudf::strings::replace_re(sv, "^aba$", cudf::string_scalar("_"), std::nullopt, multiline);
+  auto pattern = std::string("^aba$");
+  auto repl    = cudf::string_scalar("_");
+  auto results = cudf::strings::replace_re(sv, pattern, repl, std::nullopt, multiline);
   cudf::test::strings_column_wrapper expected_ml({"bcd\n_\nefg", "_\naba abab\n_", "_"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_ml);
+  auto prog = cudf::strings::regex_program::create(pattern, multiline);
+  results   = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected_ml);
 
-  results = cudf::strings::replace_re(sv, "^aba$", cudf::string_scalar("_"));
+  results = cudf::strings::replace_re(sv, pattern, repl);
   cudf::test::strings_column_wrapper expected({"bcd\naba\nefg", "aba\naba abab\naba", "_"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::replace_re(sv, *prog, repl);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
   // multi-replace
   std::vector<std::string> patterns({"aba$", "^aba"});
@@ -217,15 +255,23 @@ TEST_F(StringsReplaceRegexTest, Multiline)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, multi_expected);
 
   // backref-replace
-  results = cudf::strings::replace_with_backrefs(sv, "(^aba)", "[\\1]", multiline);
+  auto repl_template = std::string("[\\1]");
+  pattern            = std::string("(^aba)");
+  results            = cudf::strings::replace_with_backrefs(sv, pattern, repl_template, multiline);
   cudf::test::strings_column_wrapper br_expected_ml(
     {"bcd\n[aba]\nefg", "[aba]\n[aba] abab\n[aba]", "[aba]"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected_ml);
+  prog    = cudf::strings::regex_program::create(pattern, multiline);
+  results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected_ml);
 
-  results = cudf::strings::replace_with_backrefs(sv, "(^aba)", "[\\1]");
+  results = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
   cudf::test::strings_column_wrapper br_expected(
     {"bcd\naba\nefg", "[aba]\naba abab\naba", "[aba]"});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, br_expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexTest)
@@ -239,10 +285,8 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexTest)
                                      nullptr};
 
   cudf::test::strings_column_wrapper strings(
-    h_strings.begin(),
-    h_strings.end(),
-    thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
-  auto strings_view = cudf::strings_column_view(strings);
+    h_strings.begin(), h_strings.end(), cudf::test::iterators::nulls_from_nullptrs(h_strings));
+  auto sv = cudf::strings_column_view(strings);
 
   std::vector<const char*> h_expected{"the-quick-brown-fox-jumps-over-the-lazy-dog",
                                       "the-fat-cat-lays-next-to-the-other-accénted-cat",
@@ -252,38 +296,43 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexTest)
                                       "",
                                       nullptr};
 
-  std::string pattern       = "(\\w) (\\w)";
-  std::string repl_template = "\\1-\\2";
-  auto results = cudf::strings::replace_with_backrefs(strings_view, pattern, repl_template);
+  auto pattern       = std::string("(\\w) (\\w)");
+  auto repl_template = std::string("\\1-\\2");
+  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
   cudf::test::strings_column_wrapper expected(
-    h_expected.begin(),
-    h_expected.end(),
-    thrust::make_transform_iterator(h_expected.begin(), [](auto str) { return str != nullptr; }));
+    h_expected.begin(), h_expected.end(), cudf::test::iterators::nulls_from_nullptrs(h_expected));
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexAltIndexPatternTest)
 {
-  cudf::test::strings_column_wrapper strings({"12-3 34-5 67-89", "0-99: 777-888:: 5673-0"});
-  auto strings_view = cudf::strings_column_view(strings);
+  cudf::test::strings_column_wrapper input({"12-3 34-5 67-89", "0-99: 777-888:: 5673-0"});
+  auto sv = cudf::strings_column_view(input);
 
-  std::string pattern       = "(\\d+)-(\\d+)";
-  std::string repl_template = "${2} X ${1}0";
-  auto results = cudf::strings::replace_with_backrefs(strings_view, pattern, repl_template);
+  auto pattern       = std::string("(\\d+)-(\\d+)");
+  auto repl_template = std::string("${2} X ${1}0");
+  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
 
   cudf::test::strings_column_wrapper expected(
     {"3 X 120 5 X 340 89 X 670", "99 X 00: 888 X 7770:: 0 X 56730"});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexReversedTest)
 {
   cudf::test::strings_column_wrapper strings(
     {"A543", "Z756", "", "tést-string", "two-thréé four-fivé", "abcd-éfgh", "tést-string-again"});
-  auto strings_view         = cudf::strings_column_view(strings);
-  std::string pattern       = "([a-z])-([a-zé])";
-  std::string repl_template = "X\\2+\\1Z";
-  auto results = cudf::strings::replace_with_backrefs(strings_view, pattern, repl_template);
+  auto sv = cudf::strings_column_view(strings);
+
+  auto pattern       = std::string("([a-z])-([a-zé])");
+  auto repl_template = std::string("X\\2+\\1Z");
+  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
 
   cudf::test::strings_column_wrapper expected({"A543",
                                                "Z756",
@@ -293,33 +342,45 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexReversedTest)
                                                "abcXé+dZfgh",
                                                "tésXs+tZtrinXa+gZgain"});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, BackrefWithGreedyQuantifier)
 {
   cudf::test::strings_column_wrapper input(
     {"<h1>title</h1><h2>ABC</h2>", "<h1>1234567</h1><h2>XYZ</h2>"});
-  std::string replacement = "<h2>\\1</h2><p>\\2</p>";
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern       = std::string("<h1>(.*)</h1><h2>(.*)</h2>");
+  auto repl_template = std::string("<h2>\\1</h2><p>\\2</p>");
 
-  auto results = cudf::strings::replace_with_backrefs(
-    cudf::strings_column_view(input), "<h1>(.*)</h1><h2>(.*)</h2>", replacement);
+  auto results = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
   cudf::test::strings_column_wrapper expected(
     {"<h2>title</h2><p>ABC</p>", "<h2>1234567</h2><p>XYZ</p>"});
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 
-  results = cudf::strings::replace_with_backrefs(
-    cudf::strings_column_view(input), "<h1>([a-z\\d]+)</h1><h2>([A-Z]+)</h2>", replacement);
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  pattern = std::string("<h1>([a-z\\d]+)</h1><h2>([A-Z]+)</h2>");
+  results = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
+  prog    = cudf::strings::regex_program::create(pattern);
+  results = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexZeroIndexTest)
 {
   cudf::test::strings_column_wrapper strings(
     {"TEST123", "TEST1TEST2", "TEST2-TEST1122", "TEST1-TEST-T", "TES3"});
-  auto strings_view         = cudf::strings_column_view(strings);
-  std::string pattern       = "(TEST)(\\d+)";
-  std::string repl_template = "${0}: ${1}, ${2}; ";
-  auto results = cudf::strings::replace_with_backrefs(strings_view, pattern, repl_template);
+  auto sv = cudf::strings_column_view(strings);
+
+  auto pattern       = std::string("(TEST)(\\d+)");
+  auto repl_template = std::string("${0}: ${1}, ${2}; ");
+  auto results       = cudf::strings::replace_with_backrefs(sv, pattern, repl_template);
 
   cudf::test::strings_column_wrapper expected({
     "TEST123: TEST, 123; ",
@@ -329,6 +390,9 @@ TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexZeroIndexTest)
     "TES3",
   });
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected);
+  auto prog = cudf::strings::regex_program::create(pattern);
+  results   = cudf::strings::replace_with_backrefs(sv, *prog, repl_template);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
 }
 
 TEST_F(StringsReplaceRegexTest, ReplaceBackrefsRegexErrorTest)
diff --git a/cpp/tests/strings/split_tests.cpp b/cpp/tests/strings/split_tests.cpp
index c7bbce263f3..73d5adab427 100644
--- a/cpp/tests/strings/split_tests.cpp
+++ b/cpp/tests/strings/split_tests.cpp
@@ -14,20 +14,21 @@
  * limitations under the License.
  */
 
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/scalar/scalar.hpp>
+#include <cudf/strings/regex/regex_program.hpp>
 #include <cudf/strings/split/partition.hpp>
 #include <cudf/strings/split/split.hpp>
 #include <cudf/strings/split/split_re.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/table/table.hpp>
 
-#include <cudf_test/base_fixture.hpp>
-#include <cudf_test/column_utilities.hpp>
-#include <cudf_test/column_wrapper.hpp>
-#include <cudf_test/iterator_utilities.hpp>
-#include <cudf_test/table_utilities.hpp>
-
 #include <thrust/iterator/transform_iterator.h>
 
 #include <vector>
@@ -316,21 +317,28 @@ TEST_F(StringsSplitTest, SplitRegex)
   auto sv = cudf::strings_column_view(input);
 
   {
-    auto result = cudf::strings::split_re(sv, "\\s+");
+    auto pattern = std::string("\\s+");
+    auto result  = cudf::strings::split_re(sv, pattern);
 
     cudf::test::strings_column_wrapper col0({"", "", "are", "tést", ""}, validity);
     cudf::test::strings_column_wrapper col1({"Héllo", "", "some", "String", ""}, {1, 0, 1, 1, 0});
     cudf::test::strings_column_wrapper col2({"thesé", "", "", "", ""}, {1, 0, 1, 0, 0});
     auto expected = cudf::table_view({col0, col1, col2});
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_re(sv, *prog);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_re(sv, "\\s+");
+    result = cudf::strings::rsplit_re(sv, pattern);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    result = cudf::strings::rsplit_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
 
   {
-    auto result = cudf::strings::split_re(sv, "[eé]");
+    auto pattern = std::string("[eé]");
+    auto result  = cudf::strings::split_re(sv, pattern);
 
     cudf::test::strings_column_wrapper col0({" H", "", "ar", "t", ""}, validity);
     cudf::test::strings_column_wrapper col1({"llo th", "", " som", "st String", ""},
@@ -339,9 +347,14 @@ TEST_F(StringsSplitTest, SplitRegex)
     cudf::test::strings_column_wrapper col3({"", "", "", "", ""}, {1, 0, 0, 0, 0});
     auto expected = cudf::table_view({col0, col1, col2, col3});
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_re(sv, *prog);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_re(sv, "[eé]");
+    result = cudf::strings::rsplit_re(sv, pattern);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    result = cudf::strings::rsplit_re(sv, *prog);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
 }
@@ -356,20 +369,27 @@ TEST_F(StringsSplitTest, SplitRecordRegex)
 
   using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   {
-    auto result = cudf::strings::split_record_re(sv, "\\s+");
+    auto pattern = std::string("\\s+");
+    auto result  = cudf::strings::split_record_re(sv, pattern);
 
     LCW expected(
       {LCW{"", "Héllo", "thesé"}, LCW{}, LCW{"are", "some", ""}, LCW{"tést", "String"}, LCW{""}},
       validity);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_record_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_record_re(sv, "\\s+");
+    result = cudf::strings::rsplit_record_re(sv, pattern);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+    result = cudf::strings::rsplit_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
   }
 
   {
-    auto result = cudf::strings::split_record_re(sv, "[eé]");
+    auto pattern = std::string("[eé]");
+    auto result  = cudf::strings::split_record_re(sv, pattern);
 
     LCW expected({LCW{" H", "llo th", "s", ""},
                   LCW{},
@@ -378,9 +398,14 @@ TEST_F(StringsSplitTest, SplitRecordRegex)
                   LCW{""}},
                  validity);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_record_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
     // rsplit == split when using default parameters
-    result = cudf::strings::rsplit_record_re(sv, "[eé]");
+    result = cudf::strings::rsplit_record_re(sv, pattern);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+    result = cudf::strings::rsplit_record_re(sv, *prog);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
   }
 }
@@ -393,37 +418,51 @@ TEST_F(StringsSplitTest, SplitRegexWithMaxSplit)
   cudf::test::strings_column_wrapper input(h_strings.begin(), h_strings.end(), validity);
   auto sv = cudf::strings_column_view(input);
   {
-    auto result = cudf::strings::split_re(sv, "\\s+", 1);
+    auto pattern = std::string("\\s+");
+    auto result  = cudf::strings::split_re(sv, pattern, 1);
 
     cudf::test::strings_column_wrapper col0({"", "", "are", "tést", ""}, {1, 0, 1, 1, 1});
     cudf::test::strings_column_wrapper col1({"Héllo\tthesé", "", "some  ", "String", ""},
                                             {1, 0, 1, 1, 0});
     auto expected = cudf::table_view({col0, col1});
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_re(sv, *prog, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
 
     // split everything is the same output as maxsplit==2 for the test input column here
-    result         = cudf::strings::split_re(sv, "\\s+", 2);
-    auto expected2 = cudf::strings::split_re(sv, "\\s+");
+    result         = cudf::strings::split_re(sv, pattern, 2);
+    auto expected2 = cudf::strings::split_re(sv, pattern);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected2->view());
+    result = cudf::strings::split_re(sv, *prog, 3);
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected2->view());
   }
   {
-    auto result = cudf::strings::split_record_re(sv, "\\s", 1);
+    auto pattern = std::string("\\s");
+    auto result  = cudf::strings::split_record_re(sv, pattern, 1);
 
     using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
     LCW expected1(
       {LCW{"", "Héllo\tthesé"}, LCW{}, LCW{"are", "some  "}, LCW{"tést", "String"}, LCW{""}},
       validity);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected1);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_record_re(sv, *prog, 1);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected1);
 
-    result = cudf::strings::split_record_re(sv, "\\s", 2);
+    result = cudf::strings::split_record_re(sv, pattern, 2);
     LCW expected2(
       {LCW{"", "Héllo", "thesé"}, LCW{}, LCW{"are", "some", " "}, LCW{"tést", "String"}, LCW{""}},
       validity);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected2);
+    result = cudf::strings::split_record_re(sv, *prog, 2);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected2);
 
     // split everything is the same output as maxsplit==3 for the test input column here
-    result         = cudf::strings::split_record_re(sv, "\\s", 3);
-    auto expected0 = cudf::strings::split_record_re(sv, "\\s");
+    result         = cudf::strings::split_record_re(sv, pattern, 3);
+    auto expected0 = cudf::strings::split_record_re(sv, pattern);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
+    result = cudf::strings::split_record_re(sv, *prog, 3);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
   }
 }
@@ -433,7 +472,8 @@ TEST_F(StringsSplitTest, SplitRegexWordBoundary)
   cudf::test::strings_column_wrapper input({"a", "ab", "-+", "e\né"});
   auto sv = cudf::strings_column_view(input);
   {
-    auto result = cudf::strings::split_re(sv, "\\b");
+    auto pattern = std::string("\\b");
+    auto result  = cudf::strings::split_re(sv, pattern);
 
     cudf::test::strings_column_wrapper col0({"", "", "-+", ""});
     cudf::test::strings_column_wrapper col1({"a", "ab", "", "e"}, {1, 1, 0, 1});
@@ -442,13 +482,20 @@ TEST_F(StringsSplitTest, SplitRegexWordBoundary)
     cudf::test::strings_column_wrapper col4({"", "", "", ""}, {0, 0, 0, 1});
     auto expected = cudf::table_view({col0, col1, col2, col3, col4});
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_re(sv, *prog);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
   {
-    auto result = cudf::strings::split_record_re(sv, "\\B");
+    auto pattern = std::string("\\B");
+    auto result  = cudf::strings::split_record_re(sv, pattern);
 
     using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
     LCW expected({LCW{"a"}, LCW{"a", "b"}, LCW{"", "-", "+", ""}, LCW{"e\né"}});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+    auto prog = cudf::strings::regex_program::create(pattern);
+    result    = cudf::strings::split_record_re(sv, *prog);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
   }
 }
 
@@ -551,26 +598,35 @@ TEST_F(StringsSplitTest, RSplitRegexWithMaxSplit)
   cudf::test::strings_column_wrapper input(h_strings.begin(), h_strings.end(), validity);
   auto sv = cudf::strings_column_view(input);
 
+  auto pattern = std::string("\\s+");
+  auto prog    = cudf::strings::regex_program::create(pattern);
+
   {
-    auto result = cudf::strings::rsplit_re(sv, "\\s+", 1);
+    auto result = cudf::strings::rsplit_re(sv, pattern, 1);
 
     cudf::test::strings_column_wrapper col0({" Héllo", "", "are some", "tést", ""}, validity);
     cudf::test::strings_column_wrapper col1({"thesé", "", "", "String", ""}, {1, 0, 1, 1, 0});
     auto expected = cudf::table_view({col0, col1});
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
+    result = cudf::strings::rsplit_re(sv, *prog, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result->view(), expected);
   }
   {
-    auto result = cudf::strings::rsplit_record_re(sv, "\\s+", 1);
+    auto result = cudf::strings::rsplit_record_re(sv, pattern, 1);
 
     using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
     LCW expected(
       {LCW{" Héllo", "thesé"}, LCW{}, LCW{"are some", ""}, LCW{"tést", "String"}, LCW{""}},
       validity);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+    result = cudf::strings::rsplit_record_re(sv, *prog, 1);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
 
     // split everything is the same output as any maxsplit > 2 for the test input column here
-    result         = cudf::strings::rsplit_record_re(sv, "\\s+", 3);
-    auto expected0 = cudf::strings::rsplit_record_re(sv, "\\s+");
+    result         = cudf::strings::rsplit_record_re(sv, pattern, 3);
+    auto expected0 = cudf::strings::rsplit_record_re(sv, pattern);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
+    result = cudf::strings::rsplit_record_re(sv, *prog, 3);
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected0->view());
   }
 }

From a2c428c41b1a2c07033a9517dc92b34967f94a3e Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 9 Nov 2022 13:29:03 -0600
Subject: [PATCH 144/202] Fix an error in IO with `GzipFile` type (#12085)

Fixes: #10590

This PR fixes an issue where the file-like object doesn't have a `size` attribute, we will manually compute the size of the file.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - https://github.com/brandon-b-miller
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12085
---
 python/cudf/cudf/tests/test_json.py | 24 ++++++++++++++++++++++++
 python/cudf/cudf/utils/ioutils.py   | 10 +++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 34aff2c34fe..14238be7bc1 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import copy
+import gzip
 import itertools
 import os
 from io import BytesIO, StringIO
@@ -943,3 +944,26 @@ def test_order_nested_json_reader(tag, data):
     )
 
     assert_eq(expected, target, check_dtype=True)
+
+
+def test_json_round_trip_gzip():
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["abc", "def", "ghi"]})
+    bytes = BytesIO()
+    with gzip.open(bytes, mode="wb") as fo:
+        df.to_json(fo, orient="records", lines=True)
+    bytes.seek(0)
+    with gzip.open(bytes, mode="rb") as fo:
+        written_df = cudf.read_json(fo, orient="records", lines=True)
+    assert_eq(written_df, df)
+
+    # Testing writing from middle of the file.
+    loc = bytes.tell()
+
+    with gzip.open(bytes, mode="wb") as fo:
+        fo.seek(loc)
+        df.to_json(fo, orient="records", lines=True)
+    bytes.seek(loc)
+    with gzip.open(bytes, mode="rb") as fo:
+        fo.seek(loc)
+        written_df = cudf.read_json(fo, orient="records", lines=True)
+    assert_eq(written_df, df)
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 9146405c6ed..2c4b73666a5 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1953,7 +1953,15 @@ def _fsspec_data_transfer(
 
     # Calculate total file size
     if file_like:
-        file_size = path_or_fob.size
+        try:
+            file_size = path_or_fob.size
+        except AttributeError:
+            # Find file size if there is no `size`
+            # attribute
+            old_file_position = path_or_fob.tell()
+            path_or_fob.seek(0, os.SEEK_END)
+            file_size = path_or_fob.tell()
+            path_or_fob.seek(old_file_position, os.SEEK_SET)
     file_size = file_size or fs.size(path_or_fob)
 
     # Check if a direct read makes the most sense

From 26d449c81c10ae41ff03ee92f97a4990aec118be Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 9 Nov 2022 15:33:55 -0600
Subject: [PATCH 145/202] Update Numba docs links. (#12107)

This updates links that point to the Numba documentation to use the new domain.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12107
---
 docs/cudf/source/user_guide/cupy-interop.ipynb  | 2 +-
 docs/cudf/source/user_guide/guide-to-udfs.ipynb | 8 ++++----
 python/cudf/cudf/core/dataframe.py              | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/cudf/source/user_guide/cupy-interop.ipynb b/docs/cudf/source/user_guide/cupy-interop.ipynb
index 47c6ba408fb..3e169984ace 100644
--- a/docs/cudf/source/user_guide/cupy-interop.ipynb
+++ b/docs/cudf/source/user_guide/cupy-interop.ipynb
@@ -42,7 +42,7 @@
     "\n",
     "2. We can also use `DataFrame.values`.\n",
     "\n",
-    "3. We can also convert via the [CUDA array interface](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) by using cuDF's `to_cupy` functionality."
+    "3. We can also convert via the [CUDA array interface](https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html) by using cuDF's `to_cupy` functionality."
    ]
   },
   {
diff --git a/docs/cudf/source/user_guide/guide-to-udfs.ipynb b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
index f80644251c2..bd7793ac214 100644
--- a/docs/cudf/source/user_guide/guide-to-udfs.ipynb
+++ b/docs/cudf/source/user_guide/guide-to-udfs.ipynb
@@ -446,8 +446,8 @@
    "id": "00914f2a",
    "metadata": {},
    "source": [
-    "In addition to the Series.apply() method for performing custom operations, you can also pass Series objects directly into [CUDA kernels written with Numba](https://numba.pydata.org/numba-doc/latest/cuda/kernels.html).\n",
-    "Note that this section requires basic CUDA knowledge. Refer to [numba's CUDA documentation](https://numba.pydata.org/numba-doc/latest/cuda/index.html) for details.\n",
+    "In addition to the Series.apply() method for performing custom operations, you can also pass Series objects directly into [CUDA kernels written with Numba](https://numba.readthedocs.io/en/stable/cuda/kernels.html).\n",
+    "Note that this section requires basic CUDA knowledge. Refer to [numba's CUDA documentation](https://numba.readthedocs.io/en/stable/cuda/index.html) for details.\n",
     "\n",
     "The easiest way to write a Numba kernel is to use `cuda.grid(1)` to manage thread indices, and then leverage Numba's `forall` method to configure the kernel for us. Below, define a basic multiplication kernel as an example and use `@cuda.jit` to compile it."
    ]
@@ -485,7 +485,7 @@
    "source": [
     "This kernel will take an input array, multiply it by a configurable value (supplied at runtime), and store the result in an output array. Notice that we wrapped our logic in an `if` statement. Because we can launch more threads than the size of our array, we need to make sure that we don't use threads with an index that would be out of bounds. Leaving this out can result in undefined behavior.\n",
     "\n",
-    "To execute our kernel, must pre-allocate an output array and leverage the `forall` method mentioned above. First, we create a Series of all `0.0` in our DataFrame, since we want `float64` output. Next, we run the kernel with `forall`. `forall` requires us to specify our desired number of tasks, so we'll supply in the length of our Series (which we store in `size`). The [__cuda_array_interface__](https://numba.pydata.org/numba-doc/dev/cuda/cuda_array_interface.html) is what allows us to directly call our Numba kernel on our Series."
+    "To execute our kernel, must pre-allocate an output array and leverage the `forall` method mentioned above. First, we create a Series of all `0.0` in our DataFrame, since we want `float64` output. Next, we run the kernel with `forall`. `forall` requires us to specify our desired number of tasks, so we'll supply in the length of our Series (which we store in `size`). The [__cuda_array_interface__](https://numba.readthedocs.io/en/stable/cuda/cuda_array_interface.html) is what allows us to directly call our Numba kernel on our Series."
    ]
   },
   {
@@ -2624,7 +2624,7 @@
     "- Generalized NA UDFs\n",
     "\n",
     "\n",
-    "For more information please see the [cuDF](https://docs.rapids.ai/api/cudf/nightly/), [Numba.cuda](https://numba.pydata.org/numba-doc/dev/cuda/index.html), and [CuPy](https://docs-cupy.chainer.org/en/stable/) documentation."
+    "For more information please see the [cuDF](https://docs.rapids.ai/api/cudf/nightly/), [Numba.cuda](https://numba.readthedocs.io/en/stable/cuda/index.html), and [CuPy](https://docs-cupy.chainer.org/en/stable/) documentation."
    ]
   }
  ],
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 5c24b222a1b..b7ec10fee2c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -3942,7 +3942,7 @@ def apply(
         ``apply`` relies on Numba to JIT compile ``func``.
         Thus the allowed operations within ``func`` are limited to `those
         supported by the CUDA Python Numba target
-        <https://numba.pydata.org/numba-doc/latest/cuda/cudapysupported.html>`__.
+        <https://numba.readthedocs.io/en/stable/cuda/cudapysupported.html>`__.
         For more information, see the `cuDF guide to user defined functions
         <https://docs.rapids.ai/api/cudf/stable/user_guide/guide-to-udfs.html>`__.
 
@@ -4297,7 +4297,7 @@ def apply_chunks(
         respectively (See `numba CUDA kernel documentation`_).
 
         .. _numba CUDA kernel documentation:\
-        http://numba.pydata.org/numba-doc/latest/cuda/kernels.html
+        https://numba.readthedocs.io/en/stable/cuda/kernels.html
 
         In the example below, the *kernel* is invoked concurrently on each
         specified chunk. The *kernel* computes the corresponding output

From fbac4b4eb3002a21a0c6d9354bb2dffb5c5b6b73 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 9 Nov 2022 16:24:46 -0600
Subject: [PATCH 146/202] Add `truncate` API to python doc pages (#12109)

In https://github.com/rapidsai/cudf/pull/11435, the `truncate` API was added but I had a review comment(to add it docs) that I forgot to publish. This PR adds `truncate` to the docs page.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12109
---
 docs/cudf/source/api_docs/dataframe.rst | 1 +
 docs/cudf/source/api_docs/series.rst    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index f5c9053ec92..609ef9ea0b9 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -179,6 +179,7 @@ Reindexing / selection / label manipulation
    DataFrame.tail
    DataFrame.take
    DataFrame.tile
+   DataFrame.truncate
 
 .. _api.dataframe.missing:
 
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index 245793e5ea6..c721ed980ea 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -177,6 +177,7 @@ Reindexing / selection / label manipulation
    Series.take
    Series.tail
    Series.tile
+   Series.truncate
    Series.where
    Series.mask
 

From 6f78e74a90034dbc11783f7b46e27bfd4da6194e Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Wed, 9 Nov 2022 16:33:58 -0600
Subject: [PATCH 147/202] Expose engine argument in dask_cudf.read_json
 (#12101)

Exposes the `engine` argument in `dask_cudf.read_json`, enabling `dask_cudf.read_json(... engine="cudf_experimental")` for nested json data.

TODO (~maybe this PR?~):

- [ ] (**EDIT**: This should be done in a separate PR) Add simple/optimized code path to leverage the `byte_range` parameter for local storage (similar to what is done in [`dask_cudf.read_csv`](https://github.com/rapidsai/cudf/blob/7535f31cfaf7e01578c413bb3ba46b03d2014806/python/dask_cudf/dask_cudf/io/csv.py#L72)). This would depend on #12017 for nested json data.

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12101
---
 python/dask_cudf/dask_cudf/backends.py        | 11 ++--
 python/dask_cudf/dask_cudf/io/json.py         | 64 ++++++++++++++++++-
 .../dask_cudf/dask_cudf/io/tests/test_json.py | 18 ++++++
 3 files changed, 85 insertions(+), 8 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index f02c75eb3e8..58f3d807f51 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -493,13 +493,10 @@ def read_parquet(*args, engine=None, **kwargs):
             )
 
         @staticmethod
-        def read_json(*args, engine=None, **kwargs):
-            return _default_backend(
-                dd.read_json,
-                *args,
-                engine=cudf.read_json,
-                **kwargs,
-            )
+        def read_json(*args, **kwargs):
+            from dask_cudf.io.json import read_json
+
+            return read_json(*args, **kwargs)
 
         @staticmethod
         def read_orc(*args, **kwargs):
diff --git a/python/dask_cudf/dask_cudf/io/json.py b/python/dask_cudf/dask_cudf/io/json.py
index 6c3c95d1a2e..6ab2ba415a5 100644
--- a/python/dask_cudf/dask_cudf/io/json.py
+++ b/python/dask_cudf/dask_cudf/io/json.py
@@ -6,4 +6,66 @@
 
 import cudf
 
-read_json = partial(dask.dataframe.read_json, engine=cudf.read_json)
+from dask_cudf.backends import _default_backend
+
+
+def read_json(url_path, engine="auto", **kwargs):
+    """Create a dask_cudf DataFrame collection from JSON data
+
+    This function wraps ``dask.dataframe.read_json``, and passes
+    ``engine=partial(cudf.read_json, engine="auto")`` by default.
+
+    Parameters
+    ----------
+    url_path: str, list of str
+        Location to read from. If a string, can include a glob character to
+        find a set of file names.
+        Supports protocol specifications such as ``"s3://"``.
+    engine : str or Callable, default "auto"
+        If str, this value will be used as the ``engine`` argument when
+        ``cudf.read_json`` is used to create each partition. If Callable,
+        this value will be used as the underlying function used to create
+        each partition from JSON data. The default value is "auto", so
+        that ``engine=partial(cudf.read_json, engine="auto")`` will be
+        pased to ``dask.dataframe.read_json`` by default.
+    **kwargs :
+        Key-word arguments to pass through to ``dask.dataframe.read_json``.
+
+    Returns
+    -------
+    dask_cudf.DataFrame
+
+    Examples
+    --------
+    Load single file
+
+    >>> from dask_cudf import read_json
+    >>> read_json('myfile.json')  # doctest: +SKIP
+
+    Load large line-delimited JSON files using partitions of approx
+    256MB size
+
+    >>> read_json('data/file*.csv', blocksize=2**28)  # doctest: +SKIP
+
+    Load nested JSON data
+
+    >>> read_json('myfile.json', engine='cudf_experimental')  # doctest: +SKIP
+
+    See Also
+    --------
+    dask.dataframe.io.json.read_json
+    """
+
+    # TODO: Add optimized code path to leverage the
+    # `byte_range` argument in `cudf.read_json` for
+    # local storage (see `dask_cudf.read_csv`)
+    return _default_backend(
+        dask.dataframe.read_json,
+        url_path,
+        engine=(
+            partial(cudf.read_json, engine=engine)
+            if isinstance(engine, str)
+            else engine
+        ),
+        **kwargs,
+    )
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_json.py b/python/dask_cudf/dask_cudf/io/tests/test_json.py
index d19f7736e8e..9d26bf06545 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_json.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_json.py
@@ -71,3 +71,21 @@ def test_read_json_lines(lines):
         actual = dask_cudf.read_json(f, orient="records", lines=lines)
         actual_pd = pd.read_json(f, orient="records", lines=lines)
         dd.assert_eq(actual, actual_pd)
+
+
+def test_read_json_nested_experimental(tmp_path):
+    # Check that `engine="cudf_experimental"` can
+    # be used to support nested data
+    df = pd.DataFrame(
+        {
+            "a": [{"y": 2}, {"y": 4}, {"y": 6}, {"y": 8}],
+            "b": [[1, 2, 3], [4, 5], [6], [7]],
+            "c": [1, 3, 5, 7],
+        }
+    )
+    kwargs = dict(orient="records", lines=True)
+    with tmp_path / "data.json" as f:
+        df.to_json(f, **kwargs)
+        actual = dask_cudf.read_json(f, engine="cudf_experimental", **kwargs)
+        actual_pd = pd.read_json(f, **kwargs)
+        dd.assert_eq(actual, actual_pd)

From 4de279d66bcfc0f48fae4097a62fd1cfcc809503 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 9 Nov 2022 14:40:14 -0800
Subject: [PATCH 148/202] Fix reading of CSV files with blank second row
 (#12098)

There are two options to get the names of columns in a CSV file - header or the first row. In case the first row is used, names are generated, and the only part of the row that is used is the number of detected columns.

This PR fixes the corner case where a blank line after the first (non-header) row causes the reader to detect an additional column (and return an additional column of nulls).
The fix is to break when there is a terminator character within the first row; this only happens with blank row(s) after the first data row. The reader already does this when reading column names from a header, this PR just removes this difference in behavior that was causing the bug.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12098
---
 cpp/src/io/csv/reader_impl.cu | 16 +++++++---------
 cpp/tests/io/csv_test.cpp     | 25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index d669dea3115..9da56b9bef8 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -134,7 +134,6 @@ std::vector<std::string> get_column_names(std::vector<char> const& header,
   if (header.size() <= 1) { return col_names; }
 
   std::vector<char> first_row = header;
-  int num_cols                = 0;
 
   bool quotation = false;
   for (size_t pos = 0, prev = 0; pos < first_row.size(); ++pos) {
@@ -163,17 +162,16 @@ std::vector<std::string> get_column_names(std::vector<char> const& header,
 
         const string new_col_name(first_row.data() + prev, col_name_len);
         col_names.push_back(removeQuotes(new_col_name, parse_opts.quotechar));
-
-        // Stop parsing when we hit the line terminator; relevant when there is
-        // a blank line following the header. In this case, first_row includes
-        // multiple line terminators at the end, as the new recStart belongs to
-        // a line that comes after the blank line(s)
-        if (!quotation && first_row[pos] == parse_opts.terminator) { break; }
       } else {
         // This is the first data row, add the automatically generated name
-        col_names.push_back(prefix + std::to_string(num_cols));
+        col_names.push_back(prefix + std::to_string(col_names.size()));
       }
-      num_cols++;
+
+      // Stop parsing when we hit the line terminator; relevant when there is
+      // a blank line following the header. In this case, first_row includes
+      // multiple line terminators at the end, as the new recStart belongs to
+      // a line that comes after the blank line(s)
+      if (!quotation && first_row[pos] == parse_opts.terminator) { break; }
 
       // Skip adjacent delimiters if delim_whitespace is set
       while (parse_opts.multi_delimiter && pos < first_row.size() &&
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 8100c8e3d7f..8acc6f8f6ee 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -2244,4 +2244,29 @@ TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch)
   EXPECT_EQ(new_table_and_metadata.metadata.column_names[1], "1");
 }
 
+TEST_F(CsvReaderTest, BlankLineAfterFirstRow)
+{
+  std::string csv_in{"12,9., 10\n\n"};
+
+  {
+    cudf::io::csv_reader_options no_header_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+        .header(-1);
+    // No header, getting column names/count from first row
+    auto result = cudf::io::read_csv(no_header_opts);
+
+    const auto result_table = result.tbl->view();
+    ASSERT_EQ(result_table.num_columns(), 3);
+  }
+  {
+    cudf::io::csv_reader_options header_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()});
+    // Getting column names/count from header
+    auto result = cudf::io::read_csv(header_opts);
+
+    const auto result_table = result.tbl->view();
+    ASSERT_EQ(result_table.num_columns(), 3);
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From 59bd5c31d69a443cc1a9efa366d553142ccbe22f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 9 Nov 2022 20:00:57 -0600
Subject: [PATCH 149/202] Support `strip`, `lstrip`, and `rstrip` in
 `strings_udf` (#12091)

This PR adds support for the following three functions in `strings_udf`:

- `str.strip(other)`
- `str.lstrip(other)`
- `str.rstrip(other)`

Part of https://github.com/rapidsai/cudf/issues/9639

Authors:
  - https://github.com/brandon-b-miller
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12091
---
 python/cudf/cudf/core/udf/__init__.py         | 11 ++-
 python/cudf/cudf/core/udf/strings_lowering.py | 12 +++-
 python/cudf/cudf/core/udf/strings_typing.py   |  9 +++
 python/cudf/cudf/tests/test_udf_masked_ops.py | 27 ++++++++
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 43 ++++++++++++
 python/strings_udf/strings_udf/_typing.py     | 19 +++++-
 python/strings_udf/strings_udf/lowering.py    | 68 ++++++++++++++++---
 .../strings_udf/tests/test_string_udfs.py     | 24 +++++++
 8 files changed, 199 insertions(+), 14 deletions(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 926d2ea6cbf..8421d763167 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -32,7 +32,11 @@
             column_from_udf_string_array,
             column_to_string_view_array,
         )
-        from strings_udf._typing import str_view_arg_handler, string_view
+        from strings_udf._typing import (
+            str_view_arg_handler,
+            string_view,
+            udf_string,
+        )
 
         from . import strings_typing  # isort: skip
         from . import strings_lowering  # isort: skip
@@ -41,7 +45,7 @@
             masked_lowering.masked_constructor
         )
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
-        _supported_masked_types |= {string_view}
+        _supported_masked_types |= {string_view, udf_string}
 
         utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
         utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
@@ -49,6 +53,9 @@
         row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
 
         utils.arg_handlers.append(str_view_arg_handler)
+
+        masked_typing.MASKED_INIT_MAP[udf_string] = udf_string
+
         _STRING_UDFS_ENABLED = True
 
 except ImportError as e:
diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
index 59041977f87..fdfd013bad7 100644
--- a/python/cudf/cudf/core/udf/strings_lowering.py
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -7,7 +7,7 @@
 from numba.core.typing import signature as nb_signature
 from numba.cuda.cudaimpl import lower as cuda_lower
 
-from strings_udf._typing import size_type, string_view
+from strings_udf._typing import size_type, string_view, udf_string
 from strings_udf.lowering import (
     contains_impl,
     count_impl,
@@ -22,8 +22,11 @@
     istitle_impl,
     isupper_impl,
     len_impl,
+    lstrip_impl,
     rfind_impl,
+    rstrip_impl,
     startswith_impl,
+    strip_impl,
 )
 
 from cudf.core.udf.masked_typing import MaskedType
@@ -79,6 +82,13 @@ def masked_binary_func_impl(context, builder, sig, args):
     )
 
 
+create_binary_string_func("MaskedType.strip", strip_impl, udf_string)
+
+create_binary_string_func("MaskedType.lstrip", lstrip_impl, udf_string)
+
+create_binary_string_func("MaskedType.rstrip", rstrip_impl, udf_string)
+
+
 create_binary_string_func(
     "MaskedType.startswith",
     startswith_impl,
diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index 1179688651f..f8f50600b12 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -13,7 +13,9 @@
     id_unary_funcs,
     int_binary_funcs,
     size_type,
+    string_return_attrs,
     string_view,
+    udf_string,
 )
 
 from cudf.core.udf import masked_typing
@@ -172,6 +174,13 @@ def resolve_valid(self, mod):
         create_masked_binary_attr(f"MaskedType.{func}", size_type),
     )
 
+for func in string_return_attrs:
+    setattr(
+        MaskedStringViewAttrs,
+        f"resolve_{func}",
+        create_masked_binary_attr(f"MaskedType.{func}", udf_string),
+    )
+
 for func in id_unary_funcs:
     setattr(
         MaskedStringViewAttrs,
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index b4c7cef3a4c..7af47f981d6 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -876,6 +876,33 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_strip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].strip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_lstrip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].lstrip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_rstrip(str_udf_data, strip_char):
+    def func(row):
+        return row["str_col"].rstrip(strip_char)
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index b284d58fe58..63e740c5226 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -17,6 +17,7 @@
 #include <cudf/strings/udf/char_types.cuh>
 #include <cudf/strings/udf/search.cuh>
 #include <cudf/strings/udf/starts_with.cuh>
+#include <cudf/strings/udf/strip.cuh>
 #include <cudf/strings/udf/udf_string.cuh>
 
 using namespace cudf::strings::udf;
@@ -227,3 +228,45 @@ extern "C" __device__ int udf_string_from_string_view(int* nb_retbal,
 
   return 0;
 }
+
+extern "C" __device__ int strip(int* nb_retval,
+                                void* udf_str,
+                                void* const* to_strip,
+                                void* const* strip_str)
+{
+  auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
+  auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
+  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+
+  *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr);
+
+  return 0;
+}
+
+extern "C" __device__ int lstrip(int* nb_retval,
+                                 void* udf_str,
+                                 void* const* to_strip,
+                                 void* const* strip_str)
+{
+  auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
+  auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
+  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+
+  *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr, cudf::strings::side_type::LEFT);
+
+  return 0;
+}
+
+extern "C" __device__ int rstrip(int* nb_retval,
+                                 void* udf_str,
+                                 void* const* to_strip,
+                                 void* const* strip_str)
+{
+  auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
+  auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
+  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+
+  *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr, cudf::strings::side_type::RIGHT);
+
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index 320958960cd..a309a9cb93c 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -181,7 +181,7 @@ def attr(self, mod):
     return attr
 
 
-def create_identifier_attr(attrname):
+def create_identifier_attr(attrname, retty):
     """
     Helper function wrapping numba's low level extension API. Provides
     the boilerplate needed to register a unary function of a string
@@ -192,7 +192,7 @@ class StringViewIdentifierAttr(AbstractTemplate):
         key = f"StringView.{attrname}"
 
         def generic(self, args, kws):
-            return nb_signature(types.boolean, recvr=self.this)
+            return nb_signature(retty, recvr=self.this)
 
     def attr(self, mod):
         return types.BoundFunction(StringViewIdentifierAttr, string_view)
@@ -229,6 +229,7 @@ def resolve_count(self, mod):
     "isnumeric",
     "istitle",
 ]
+string_return_attrs = ["strip", "lstrip", "rstrip"]
 
 for func in bool_binary_funcs:
     setattr(
@@ -237,12 +238,24 @@ def resolve_count(self, mod):
         create_binary_attr(func, types.boolean),
     )
 
+for func in string_return_attrs:
+    setattr(
+        StringViewAttrs,
+        f"resolve_{func}",
+        create_binary_attr(func, udf_string),
+    )
+
+
 for func in int_binary_funcs:
     setattr(
         StringViewAttrs, f"resolve_{func}", create_binary_attr(func, size_type)
     )
 
 for func in id_unary_funcs:
-    setattr(StringViewAttrs, f"resolve_{func}", create_identifier_attr(func))
+    setattr(
+        StringViewAttrs,
+        f"resolve_{func}",
+        create_identifier_attr(func, types.boolean),
+    )
 
 cuda_decl_registry.register_attr(StringViewAttrs)
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 909b0e56187..17a1869e881 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -19,6 +19,7 @@
 character_flags_table_ptr = get_character_flags_table_ptr()
 
 _STR_VIEW_PTR = types.CPointer(string_view)
+_UDF_STRING_PTR = types.CPointer(udf_string)
 
 
 # CUDA function declarations
@@ -34,6 +35,12 @@ def _declare_binary_func(lhs, rhs, out, name):
     )
 
 
+def _declare_strip_func(name):
+    return cuda.declare_device(
+        name, size_type(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
+    )
+
+
 # A binary function of the form f(string, string) -> bool
 _declare_bool_str_str_func = partial(
     _declare_binary_func, _STR_VIEW_PTR, _STR_VIEW_PTR, types.boolean
@@ -55,6 +62,9 @@ def _declare_binary_func(lhs, rhs, out, name):
 _string_view_find = _declare_size_type_str_str_func("find")
 _string_view_rfind = _declare_size_type_str_str_func("rfind")
 _string_view_contains = _declare_bool_str_str_func("contains")
+_string_view_strip = _declare_strip_func("strip")
+_string_view_lstrip = _declare_strip_func("lstrip")
+_string_view_rstrip = _declare_strip_func("rstrip")
 
 
 # A binary function of the form f(string, int) -> bool
@@ -162,17 +172,44 @@ def deco(cuda_func):
         def binary_func_impl(context, builder, sig, args):
             lhs_ptr = builder.alloca(args[0].type)
             rhs_ptr = builder.alloca(args[1].type)
-
             builder.store(args[0], lhs_ptr)
             builder.store(args[1], rhs_ptr)
-            result = context.compile_internal(
-                builder,
-                cuda_func,
-                nb_signature(retty, _STR_VIEW_PTR, _STR_VIEW_PTR),
-                (lhs_ptr, rhs_ptr),
-            )
 
-            return result
+            # these conditional statements should compile out
+            if retty != udf_string:
+                # binary function of two strings yielding a fixed-width type
+                # example: str.startswith(other) -> bool
+                # shim functions can return the value through nb_retval
+                result = context.compile_internal(
+                    builder,
+                    cuda_func,
+                    nb_signature(retty, _STR_VIEW_PTR, _STR_VIEW_PTR),
+                    (lhs_ptr, rhs_ptr),
+                )
+                return result
+            else:
+                # binary function of two strings yielding a new string
+                # example: str.strip(other) -> str
+                # shim functions can not return a struct due to C linkage
+                # so we create a new udf_string and pass a pointer to it
+                # for the shim function to write the output to. The return
+                # value of compile_internal is therefore discarded (although
+                # this may change in the future if we need to return error
+                # codes, for instance).
+                udf_str_ptr = builder.alloca(
+                    default_manager[udf_string].get_value_type()
+                )
+
+                _ = context.compile_internal(
+                    builder,
+                    cuda_func,
+                    size_type(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR),
+                    (udf_str_ptr, lhs_ptr, rhs_ptr),
+                )
+                result = cgutils.create_struct_proxy(udf_string)(
+                    context, builder, value=builder.load(udf_str_ptr)
+                )
+                return result._getvalue()
 
         return binary_func_impl
 
@@ -214,6 +251,21 @@ def lt_impl(st, rhs):
     return _string_view_lt(st, rhs)
 
 
+@create_binary_string_func("StringView.strip", udf_string)
+def strip_impl(result, to_strip, strip_char):
+    return _string_view_strip(result, to_strip, strip_char)
+
+
+@create_binary_string_func("StringView.lstrip", udf_string)
+def lstrip_impl(result, to_strip, strip_char):
+    return _string_view_lstrip(result, to_strip, strip_char)
+
+
+@create_binary_string_func("StringView.rstrip", udf_string)
+def rstrip_impl(result, to_strip, strip_char):
+    return _string_view_rstrip(result, to_strip, strip_char)
+
+
 @create_binary_string_func("StringView.startswith", types.boolean)
 def startswith_impl(sv, substr):
     return _string_view_startswith(sv, substr)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index ca3fbda4eb1..522433d404f 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -278,3 +278,27 @@ def func(st):
         return st
 
     run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_strip(data, strip_char):
+    def func(st):
+        return st.strip(strip_char)
+
+    run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_lstrip(data, strip_char):
+    def func(st):
+        return st.lstrip(strip_char)
+
+    run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("strip_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_rstrip(data, strip_char):
+    def func(st):
+        return st.rstrip(strip_char)
+
+    run_udf_test(data, func, "str")

From 4497ed6bcd2347ba7a77f4b8d9e7f867c10e1d42 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 10 Nov 2022 05:38:05 -0500
Subject: [PATCH 150/202] Workaround groupby aggregate thrust::copy_if overflow
 (#12079)

Workaround for limitation in `thrust::copy_if` which fails if the input-iterator spans more than int-max.
The `thrust::copy_if` hardcodes the iterator distance type to be an int
https://github.com/NVIDIA/thrust/blob/dbd144ed543b60c4ff9d456edd19869e82fe8873/thrust/system/cuda/detail/copy_if.h#L699-L708

Found existing thrust issue: https://github.com/NVIDIA/thrust/issues/1302

This calls the `copy_if` in chunks if the iterator can span greater than int-max.

Closes #12058

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Alessandro Bellina (https://github.com/abellina)
  - Robert Maynard (https://github.com/robertmaynard)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12079
---
 cpp/src/groupby/hash/groupby.cu | 37 +++++++++++++++++++++++----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index c07833520ab..90c869b8c58 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -512,18 +512,33 @@ rmm::device_uvector<size_type> extract_populated_keys(map_type const& map,
 {
   rmm::device_uvector<size_type> populated_keys(num_keys, stream);
 
-  auto get_key    = [] __device__(auto const& element) { return element.first; };  // first = key
-  auto get_key_it = thrust::make_transform_iterator(map.data(), get_key);
-  auto key_used   = [unused = map.get_unused_key()] __device__(auto key) { return key != unused; };
-
-  auto end_it = thrust::copy_if(rmm::exec_policy(stream),
-                                get_key_it,
-                                get_key_it + map.capacity(),
-                                populated_keys.begin(),
-                                key_used);
-
-  populated_keys.resize(std::distance(populated_keys.begin(), end_it), stream);
+  auto const get_key = [] __device__(auto const& element) { return element.first; };  // first = key
+  auto const key_used = [unused = map.get_unused_key()] __device__(auto key) {
+    return key != unused;
+  };
+  auto key_itr = thrust::make_transform_iterator(map.data(), get_key);
+
+  // thrust::copy_if has a bug where it cannot iterate over int-max values
+  // so if map.capacity() > int-max we'll call thrust::copy_if in chunks instead
+  auto const copy_size =
+    std::min(map.capacity(), static_cast<std::size_t>(std::numeric_limits<int>::max()));
+  auto const key_end = key_itr + map.capacity();
+  auto pop_keys_itr  = populated_keys.begin();
+
+  std::size_t output_size = 0;
+  while (key_itr != key_end) {
+    auto const copy_end = static_cast<std::size_t>(std::distance(key_itr, key_end)) <= copy_size
+                            ? key_end
+                            : key_itr + copy_size;
+    auto const end_it =
+      thrust::copy_if(rmm::exec_policy(stream), key_itr, copy_end, pop_keys_itr, key_used);
+    auto const copied = std::distance(pop_keys_itr, end_it);
+    pop_keys_itr += copied;
+    output_size += copied;
+    key_itr = copy_end;
+  }
 
+  populated_keys.resize(output_size, stream);
   return populated_keys;
 }
 

From 8ca2bd911a82052cb47e3fcfc77eb89a6f8f495d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 10 Nov 2022 10:04:06 -0600
Subject: [PATCH 151/202] First pass of `pd.read_orc` changes in tests (#12103)

This PR changes calls going via `pyarrow` and then `to_pandas` to directly call `pd.read_orc`. How-ever since `pd.read_orc` was added in pandas 1.0, we will need to version the call to this constructor. This PR does that.

Partially contributes to #11540

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12103
---
 python/cudf/cudf/core/_compat.py    |   1 -
 python/cudf/cudf/tests/test_gcs.py  |   4 +-
 python/cudf/cudf/tests/test_hdfs.py |   5 +-
 python/cudf/cudf/tests/test_orc.py  | 168 +++++++---------------------
 python/cudf/cudf/tests/test_s3.py   |   8 +-
 5 files changed, 45 insertions(+), 141 deletions(-)

diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index 5534d732f53..3889fcc4cc0 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -4,7 +4,6 @@
 from packaging import version
 
 PANDAS_VERSION = version.parse(pd.__version__)
-PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0")
 PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1")
 PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
 PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
diff --git a/python/cudf/cudf/tests/test_gcs.py b/python/cudf/cudf/tests/test_gcs.py
index f15d705c4e2..a677ace18ec 100644
--- a/python/cudf/cudf/tests/test_gcs.py
+++ b/python/cudf/cudf/tests/test_gcs.py
@@ -5,8 +5,6 @@
 
 import numpy as np
 import pandas as pd
-import pyarrow as pa
-import pyarrow.orc
 import pytest
 
 import cudf
@@ -71,5 +69,5 @@ def mock_open(*args, **kwargs):
     monkeypatch.setattr(gcsfs.core.GCSFileSystem, "open", mock_open)
     gdf.to_orc(f"gcs://{gcs_fname}")
 
-    got = pa.orc.ORCFile(local_filepath).read().to_pandas()
+    got = pd.read_orc(local_filepath)
     assert_eq(pdf, got)
diff --git a/python/cudf/cudf/tests/test_hdfs.py b/python/cudf/cudf/tests/test_hdfs.py
index 8730cb187b5..f8de16f8609 100644
--- a/python/cudf/cudf/tests/test_hdfs.py
+++ b/python/cudf/cudf/tests/test_hdfs.py
@@ -8,7 +8,6 @@
 import pandas as pd
 import pyarrow as pa
 import pytest
-from pyarrow import orc
 
 import cudf
 from cudf.testing._utils import assert_eq
@@ -212,7 +211,7 @@ def test_read_orc(datadir, hdfs, test_url):
         hd_fpath = f"hdfs://{basedir}/file.orc"
 
     got = cudf.read_orc(hd_fpath)
-    expect = orc.ORCFile(buffer).read().to_pandas()
+    expect = pd.read_orc(buffer)
     assert_eq(expect, got)
 
 
@@ -232,7 +231,7 @@ def test_write_orc(pdf, hdfs, test_url):
 
     assert hdfs.exists(f"{basedir}/test_orc_writer.orc")
     with hdfs.open(f"{basedir}/test_orc_writer.orc", mode="rb") as f:
-        got = orc.ORCFile(f).read().to_pandas()
+        got = pd.read_orc(f)
 
     assert_eq(pdf, got)
 
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index fbd9b83330e..1699c11617a 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -84,12 +84,8 @@ def _make_path_or_buf(src):
 )
 def test_orc_reader_basic(datadir, inputfile, columns, use_index, engine):
     path = datadir / inputfile
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    expect = orcfile.read(columns=columns).to_pandas()
+    expect = pd.read_orc(path, columns=columns)
     got = cudf.read_orc(
         path, engine=engine, columns=columns, use_index=use_index
     )
@@ -119,8 +115,7 @@ def test_orc_reader_local_filepath():
 def test_orc_reader_filepath_or_buffer(path_or_buf, src):
     cols = ["int1", "long1", "float1", "double1"]
 
-    orcfile = pa.orc.ORCFile(path_or_buf("filepath"))
-    expect = orcfile.read(columns=cols).to_pandas()
+    expect = pd.read_orc(path_or_buf("filepath"), columns=cols)
     got = cudf.read_orc(path_or_buf(src), columns=cols)
 
     assert_eq(expect, got)
@@ -128,12 +123,8 @@ def test_orc_reader_filepath_or_buffer(path_or_buf, src):
 
 def test_orc_reader_trailing_nulls(datadir):
     path = datadir / "TestOrcFile.nulls-at-end-snappy.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    expect = orcfile.read().to_pandas().fillna(0)
+    expect = pd.read_orc(path).fillna(0)
     got = cudf.read_orc(path).fillna(0)
 
     # PANDAS uses NaN to represent invalid data, which forces float dtype
@@ -164,12 +155,8 @@ def test_orc_reader_datetimestamp(datadir, inputfile, use_index):
 
 def test_orc_reader_strings(datadir):
     path = datadir / "TestOrcFile.testStringAndBinaryStatistics.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    expect = orcfile.read(columns=["string1"])
+    expect = pd.read_orc(path, columns=["string1"])
     got = cudf.read_orc(path, columns=["string1"])
 
     assert_eq(expect, got, check_categorical=False)
@@ -285,12 +272,8 @@ def test_orc_read_stripes(datadir, engine):
 @pytest.mark.parametrize("skiprows", [0, 1, 3000])
 def test_orc_read_rows(datadir, skiprows, num_rows):
     path = datadir / "TestOrcFile.decimal.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    pdf = orcfile.read().to_pandas()
+    pdf = pd.read_orc(path)
     gdf = cudf.read_orc(path, skiprows=skiprows, num_rows=num_rows)
 
     # Slice rows out of the whole dataframe for comparison as PyArrow doesn't
@@ -329,19 +312,17 @@ def test_orc_read_skiprows():
     # repro for other sizes of data
     skiprows = 10
 
-    expected = cudf.read_orc(buff)[skiprows:].reset_index(drop=True)
+    expected = (
+        pd.read_orc(buff)[skiprows:].reset_index(drop=True).astype("bool")
+    )
     got = cudf.read_orc(buff, skiprows=skiprows)
     assert_eq(expected, got)
 
 
 def test_orc_reader_uncompressed_block(datadir):
     path = datadir / "uncompressed_snappy.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    expect = orcfile.read().to_pandas()
+    expect = pd.read_orc(path)
     got = cudf.read_orc(path)
 
     assert_eq(expect, got, check_categorical=False)
@@ -349,15 +330,8 @@ def test_orc_reader_uncompressed_block(datadir):
 
 def test_orc_reader_nodata_block(datadir):
     path = datadir / "nodata.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except Exception as excpr:
-        if type(excpr).__name__ == "ArrowIOError":
-            pytest.skip(".orc file is not found")
-        else:
-            print(type(excpr).__name__)
 
-    expect = orcfile.read().to_pandas()
+    expect = pd.read_orc(path)
     got = cudf.read_orc(path, num_rows=1)
 
     assert_eq(expect, got, check_categorical=False)
@@ -386,19 +360,9 @@ def test_orc_writer(datadir, tmpdir, reference_file, columns, compression):
     pdf_fname = datadir / reference_file
     gdf_fname = tmpdir.join("gdf.orc")
 
-    try:
-        orcfile = pa.orc.ORCFile(pdf_fname)
-    except Exception as excpr:
-        if type(excpr).__name__ == "ArrowIOError":
-            pytest.skip(".orc file is not found")
-        else:
-            print(type(excpr).__name__)
-
-    expect = cudf.from_pandas(orcfile.read(columns=columns).to_pandas())
+    expect = cudf.from_pandas(pd.read_orc(pdf_fname, columns=columns))
     expect.to_orc(gdf_fname.strpath, compression=compression)
-    got = cudf.from_pandas(
-        pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas()
-    )
+    got = cudf.from_pandas(pd.read_orc(gdf_fname, columns=columns))
 
     assert_frame_equal(expect, got)
 
@@ -409,17 +373,9 @@ def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
     pdf_fname = datadir / reference_file
     gdf_fname = tmpdir.join("gdf.orc")
 
-    try:
-        orcfile = pa.orc.ORCFile(pdf_fname)
-    except Exception as excpr:
-        if type(excpr).__name__ == "ArrowIOError":
-            pytest.skip(".orc file is not found")
-        else:
-            print(type(excpr).__name__)
-
-    expect = cudf.from_pandas(orcfile.read().to_pandas())
+    expect = cudf.from_pandas(pd.read_orc(pdf_fname))
     expect.to_orc(gdf_fname.strpath, statistics=stats_freq)
-    got = cudf.from_pandas(pa.orc.ORCFile(gdf_fname).read().to_pandas())
+    got = cudf.from_pandas(pd.read_orc(gdf_fname))
 
     assert_frame_equal(expect, got)
 
@@ -430,14 +386,6 @@ def test_chunked_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
     pdf_fname = datadir / reference_file
     gdf_fname = tmpdir.join("chunked_gdf.orc")
 
-    try:
-        orcfile = pa.orc.ORCFile(pdf_fname)
-    except Exception as excpr:
-        if type(excpr).__name__ == "ArrowIOError":
-            pytest.skip(".orc file is not found")
-        else:
-            print(type(excpr).__name__)
-
     columns = [
         "boolean1",
         "byte1",
@@ -447,7 +395,7 @@ def test_chunked_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
         "float1",
         "double1",
     ]
-    pdf = orcfile.read(columns=columns).to_pandas()
+    pdf = pd.read_orc(pdf_fname, columns=columns)
     gdf = cudf.from_pandas(pdf)
     expect = pd.concat([pdf, pdf]).reset_index(drop=True)
 
@@ -456,7 +404,7 @@ def test_chunked_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
     writer.write_table(gdf)
     writer.close()
 
-    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+    got = pd.read_orc(gdf_fname)
 
     assert_eq(expect, got)
 
@@ -486,15 +434,7 @@ def test_chunked_orc_writer(
     pdf_fname = datadir / reference_file
     gdf_fname = tmpdir.join("chunked_gdf.orc")
 
-    try:
-        orcfile = pa.orc.ORCFile(pdf_fname)
-    except Exception as excpr:
-        if type(excpr).__name__ == "ArrowIOError":
-            pytest.skip(".orc file is not found")
-        else:
-            print(type(excpr).__name__)
-
-    pdf = orcfile.read(columns=columns).to_pandas()
+    pdf = pd.read_orc(pdf_fname, columns=columns)
     gdf = cudf.from_pandas(pdf)
     expect = pd.concat([pdf, pdf]).reset_index(drop=True)
 
@@ -503,7 +443,7 @@ def test_chunked_orc_writer(
     writer.write_table(gdf)
     writer.close()
 
-    got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas()
+    got = pd.read_orc(gdf_fname, columns=columns)
     assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
 
 
@@ -521,7 +461,7 @@ def test_orc_writer_strings(tmpdir, dtypes):
 
     expect = cudf.datasets.randomdata(nrows=10, dtypes=dtypes, seed=1)
     expect.to_orc(gdf_fname)
-    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+    got = pd.read_orc(gdf_fname)
 
     assert_eq(expect, got)
 
@@ -546,7 +486,7 @@ def test_chunked_orc_writer_strings(tmpdir, dtypes):
     writer.write_table(gdf)
     writer.close()
 
-    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+    got = pd.read_orc(gdf_fname)
 
     assert_eq(expect, got)
 
@@ -577,13 +517,8 @@ def test_orc_writer_sliced(tmpdir):
 def test_orc_reader_decimal_type(datadir, orc_file):
     file_path = datadir / orc_file
 
-    try:
-        orcfile = pa.orc.ORCFile(file_path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
-    pdf = orcfile.read().to_pandas()
-    df = cudf.read_orc(file_path).to_pandas()
+    pdf = pd.read_orc(file_path)
+    df = cudf.read_orc(file_path)
 
     assert_eq(pdf, df)
 
@@ -591,13 +526,8 @@ def test_orc_reader_decimal_type(datadir, orc_file):
 def test_orc_decimal_precision_fail(datadir):
     file_path = datadir / "TestOrcFile.int_decimal.precision_19.orc"
 
-    try:
-        orcfile = pa.orc.ORCFile(file_path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
-
     # Shouldn't cause failure if decimal column is not chosen to be read.
-    pdf = orcfile.read(columns=["int"]).to_pandas()
+    pdf = pd.read_orc(file_path, columns=["int"])
     gdf = cudf.read_orc(file_path, columns=["int"])
 
     assert_eq(pdf, gdf)
@@ -624,13 +554,9 @@ def test_orc_reader_tzif_timestamps(datadir):
     # Contains timstamps in the range covered by the TZif file
     # Other timedate tests only cover "future" times
     path = datadir / "TestOrcFile.lima_timezone.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    pdf = orcfile.read().to_pandas()
-    gdf = cudf.read_orc(path).to_pandas()
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
 
     assert_eq(pdf, gdf)
 
@@ -882,13 +808,9 @@ def test_orc_write_bool_statistics(tmpdir, datadir, nrows):
 
 def test_orc_reader_gmt_timestamps(datadir):
     path = datadir / "TestOrcFile.gmt.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    pdf = orcfile.read().to_pandas()
-    gdf = cudf.read_orc(path).to_pandas()
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
     assert_eq(pdf, gdf)
 
 
@@ -914,7 +836,7 @@ def test_orc_bool_encode_fail():
     okay_df.to_orc(buffer)
 
     # Also validate data
-    pdf = pa.orc.ORCFile(buffer).read().to_pandas()
+    pdf = pd.read_orc(buffer)
 
     assert_eq(okay_df.to_pandas(nullable=True), pdf)
 
@@ -929,8 +851,8 @@ def test_nanoseconds_overflow():
     cudf_got = cudf.read_orc(buffer)
     assert_eq(expected, cudf_got)
 
-    pyarrow_got = pa.orc.ORCFile(buffer).read()
-    assert_eq(expected.to_pandas(), pyarrow_got.to_pandas())
+    pandas_got = pd.read_orc(buffer)
+    assert_eq(expected, pandas_got)
 
 
 def test_empty_dataframe():
@@ -1207,7 +1129,7 @@ def test_skip_rows_for_nested_types(columns, list_struct_buff):
 def test_pyspark_struct(datadir):
     path = datadir / "TestOrcFile.testPySparkStruct.orc"
 
-    pdf = pa.orc.ORCFile(path).read().to_pandas()
+    pdf = pd.read_orc(path)
     gdf = cudf.read_orc(path)
 
     assert_eq(pdf, gdf)
@@ -1391,13 +1313,9 @@ def test_map_type_read(columns, num_rows, use_index):
 
 def test_orc_reader_decimal(datadir):
     path = datadir / "TestOrcFile.decimal.orc"
-    try:
-        orcfile = pa.orc.ORCFile(path)
-    except pa.ArrowIOError as e:
-        pytest.skip(".orc file is not found: %s" % e)
 
-    pdf = orcfile.read().to_pandas()
-    gdf = cudf.read_orc(path).to_pandas()
+    pdf = pd.read_orc(path)
+    gdf = cudf.read_orc(path)
 
     assert_eq(pdf, gdf)
 
@@ -1478,7 +1396,7 @@ def test_orc_writer_lists(data):
         buffer, stripe_size_rows=2048, row_index_stride=512
     )
 
-    pdf_out = pa.orc.ORCFile(buffer).read().to_pandas()
+    pdf_out = pd.read_orc(buffer)
     assert_eq(pdf_out, pdf_in)
 
 
@@ -1500,7 +1418,7 @@ def test_chunked_orc_writer_lists():
     writer.write_table(gdf)
     writer.close()
 
-    got = pa.orc.ORCFile(buffer).read().to_pandas()
+    got = pd.read_orc(buffer)
     assert_eq(expect, got)
 
 
@@ -1508,17 +1426,9 @@ def test_writer_timestamp_stream_size(datadir, tmpdir):
     pdf_fname = datadir / "TestOrcFile.largeTimestamps.orc"
     gdf_fname = tmpdir.join("gdf.orc")
 
-    try:
-        orcfile = pa.orc.ORCFile(pdf_fname)
-    except Exception as excpr:
-        if type(excpr).__name__ == "ArrowIOError":
-            pytest.skip(".orc file is not found")
-        else:
-            print(type(excpr).__name__)
-
-    expect = orcfile.read().to_pandas()
+    expect = pd.read_orc(pdf_fname)
     cudf.from_pandas(expect).to_orc(gdf_fname.strpath)
-    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+    got = pd.read_orc(gdf_fname)
 
     assert_eq(expect, got)
 
@@ -1591,7 +1501,7 @@ def test_orc_writer_lists_empty_rg(data):
     df = cudf.read_orc(buffer)
     assert_eq(df, cudf_in)
 
-    pdf_out = pa.orc.ORCFile(buffer).read().to_pandas()
+    pdf_out = pd.read_orc(buffer)
     assert_eq(pdf_in, pdf_out)
 
 
@@ -1696,7 +1606,7 @@ def test_orc_writer_rle_stream_size(datadir, tmpdir):
 
     # Segfaults when RLE stream sizes don't account for varint length
     pa_out = pa.orc.ORCFile(reencoded).read()
-    assert_eq(df.to_pandas(), pa_out)
+    assert df.to_arrow().equals(pa_out)
 
 
 def test_empty_columns():
diff --git a/python/cudf/cudf/tests/test_s3.py b/python/cudf/cudf/tests/test_s3.py
index d2339930b91..de3bba25223 100644
--- a/python/cudf/cudf/tests/test_s3.py
+++ b/python/cudf/cudf/tests/test_s3.py
@@ -7,9 +7,7 @@
 
 import numpy as np
 import pandas as pd
-import pyarrow as pa
 import pyarrow.fs as pa_fs
-import pyarrow.orc
 import pytest
 from fsspec.core import get_fs_token_paths
 
@@ -442,7 +440,7 @@ def test_read_orc(s3_base, s3so, datadir, use_python_file_object, columns):
     source_file = str(datadir / "orc" / "TestOrcFile.testSnappy.orc")
     fname = "test_orc_reader.orc"
     bucket = "orc"
-    expect = pa.orc.ORCFile(source_file).read().to_pandas()
+    expect = pd.read_orc(source_file)
 
     with open(source_file, "rb") as f:
         buffer = f.read()
@@ -465,7 +463,7 @@ def test_read_orc_arrow_nativefile(s3_base, s3so, datadir, columns):
     source_file = str(datadir / "orc" / "TestOrcFile.testSnappy.orc")
     fname = "test_orc_reader.orc"
     bucket = "orc"
-    expect = pa.orc.ORCFile(source_file).read().to_pandas()
+    expect = pd.read_orc(source_file)
 
     with open(source_file, "rb") as f:
         buffer = f.read()
@@ -491,7 +489,7 @@ def test_write_orc(s3_base, s3so, pdf):
         assert s3fs.exists(f"s3://{bucket}/{fname}")
 
         with s3fs.open(f"s3://{bucket}/{fname}") as f:
-            got = pa.orc.ORCFile(f).read().to_pandas()
+            got = pd.read_orc(f)
 
     assert_eq(pdf, got)
 

From b3429fb2322debf6306acbbbf3bf14b0692e932a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 10 Nov 2022 10:41:36 -0600
Subject: [PATCH 152/202] Remove "Multi-GPU with Dask-cuDF" notebook. (#12095)

This PR removes an outdated notebook for "Multi-GPU with Dask-cuDF" from the docs. Resolves #6583 with some of the changes from #6665.

See also: https://github.com/rapidsai/rapids.ai/pull/256#issuecomment-1307827640

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Richard (Rick) Zamora (https://github.com/rjzamora)

URL: https://github.com/rapidsai/cudf/pull/12095
---
 docs/cudf/source/user_guide/dask-cudf.md | 104 -----------------------
 docs/cudf/source/user_guide/index.md     |   1 -
 2 files changed, 105 deletions(-)
 delete mode 100644 docs/cudf/source/user_guide/dask-cudf.md

diff --git a/docs/cudf/source/user_guide/dask-cudf.md b/docs/cudf/source/user_guide/dask-cudf.md
deleted file mode 100644
index 2d829008ac9..00000000000
--- a/docs/cudf/source/user_guide/dask-cudf.md
+++ /dev/null
@@ -1,104 +0,0 @@
-# Multi-GPU with Dask-cuDF
-
-cuDF is a single-GPU library. For Multi-GPU cuDF solutions we use
-[Dask](https://dask.org/) and the [dask-cudf
-package](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf),
-which is able to scale cuDF across multiple GPUs on a single machine,
-or multiple GPUs across many machines in a cluster.
-
-[Dask DataFrame](http://docs.dask.org/en/latest/dataframe.html) was
-originally designed to scale Pandas, orchestrating many Pandas
-DataFrames spread across many CPUs into a cohesive parallel DataFrame.
-Because cuDF currently implements only a subset of the Pandas API, not
-all Dask DataFrame operations work with cuDF.
-
-The following is tested and expected to work:
-
-## What works
-
-- Data ingestion
-
-  - `dask_cudf.read_csv`
-  - Use standard Dask ingestion with Pandas, then convert to cuDF (For
-    Parquet and other formats this is often decently fast)
-
-- Linear operations
-
-  - Element-wise operations: `df.x + df.y`, `df ** 2`
-  - Assignment: `df['z'] = df.x + df.y`
-  - Row-wise selections: `df[df.x > 0]`
-  - Loc: `df.loc['2001-01-01': '2005-02-02']`
-  - Date time/string accessors: `df.timestamp.dt.dayofweek`
-  - ... and most similar operations in this category that are already
-    implemented in cuDF
-
-- Reductions
-
-  - Like `sum`, `mean`, `max`, `count`, and so on on
-    `Series` objects
-  - Support for reductions on full dataframes
-  - `std`
-  - Custom reductions with
-    [dask.dataframe.reduction](https://docs.dask.org/en/latest/generated/dask.dataframe.Series.reduction.html)
-
-- Groupby aggregations
-
-  - On single columns: `df.groupby('x').y.max()`
-  - With custom aggregations:
-  - groupby standard deviation
-  - grouping on multiple columns
-  - groupby agg for multiple outputs
-
-- Joins:
-
-  - On full unsorted columns: `left.merge(right, on='id')`
-    (expensive)
-  - On sorted indexes:
-    `left.merge(right, left_index=True, right_index=True)` (fast)
-  - On large and small dataframes: `left.merge(cudf_df, on='id')`
-    (fast)
-
-- Rolling operations
-
-- Converting to and from other forms
-
-  - Dask + Pandas to Dask + cuDF
-    `df.map_partitions(cudf.DataFrame.from_pandas)`
-  - Dask + cuDF to Dask + Pandas
-    `df.map_partitions(lambda df: df.to_pandas())`
-  - cuDF to Dask + cuDF:
-    `dask.dataframe.from_pandas(df, npartitions=20)`
-  - Dask + cuDF to cuDF: `df.compute()`
-
-Additionally all generic Dask operations, like `compute`, `persist`,
-`visualize` and so on work regardless.
-
-## Developing the API
-
-Above we mention the following:
-
-> and most similar operations in this category that are already
-> implemented in cuDF
-
-This is because it is difficult to create a comprehensive list of
-operations in the cuDF and Pandas libraries. The API is large enough to
-be difficult to track effectively. For any operation that operates
-row-wise like `fillna` or `query` things will likely, but not
-certainly work. If operations don't work it is often due to a slight
-inconsistency between Pandas and cuDF that is generally easy to fix. We
-encourage users to look at the [cuDF issue
-tracker](https://github.com/rapidsai/cudf/issues) to see if their
-issue has already been reported and, if not, [raise a new
-issue](https://github.com/rapidsai/cudf/issues/new).
-
-## Navigating the API
-
-This project reuses the [Dask
-DataFrame](https://docs.dask.org/en/latest/dataframe.html) project,
-which was originally designed for Pandas, with the newer library cuDF.
-Because we use the same Dask classes for both projects there are often
-methods that are implemented for Pandas, but not yet for cuDF. As a
-result users looking at the full Dask DataFrame API can be misleading,
-and often lead to frustration when operations that are advertised in the
-Dask API do not work as expected with cuDF. We apologize for this in
-advance.
diff --git a/docs/cudf/source/user_guide/index.md b/docs/cudf/source/user_guide/index.md
index d99056f69f2..86168f0d81b 100644
--- a/docs/cudf/source/user_guide/index.md
+++ b/docs/cudf/source/user_guide/index.md
@@ -11,7 +11,6 @@ missing-data
 groupby
 guide-to-udfs
 cupy-interop
-dask-cudf
 options
 PandasCompat
 ```

From b30664b44559174e2f9fd4c1346120b0e2cb0e2e Mon Sep 17 00:00:00 2001
From: Gregory Kimball <gregory.kimball@sunpowercorp.com>
Date: Thu, 10 Nov 2022 12:36:58 -0800
Subject: [PATCH 153/202] Fix conditional_full_join benchmark (#12121)

The `CONDITIONAL_FULL_JOIN_BENCHMARK_DEFINE` benchmark category was mapping to `cudf::conditional_inner_join` instead of `cudf::conditional_full_join`

Authors:
  - Gregory Kimball (https://github.com/GregoryKimball)

Approvers:
  - Robert Maynard (https://github.com/robertmaynard)
  - Divye Gala (https://github.com/divyegala)

URL: https://github.com/rapidsai/cudf/pull/12121
---
 cpp/benchmarks/join/conditional_join.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/benchmarks/join/conditional_join.cu b/cpp/benchmarks/join/conditional_join.cu
index 3c4208bf0fc..547367ffb69 100644
--- a/cpp/benchmarks/join/conditional_join.cu
+++ b/cpp/benchmarks/join/conditional_join.cu
@@ -70,7 +70,7 @@ CONDITIONAL_LEFT_JOIN_BENCHMARK_DEFINE(conditional_left_join_64bit_nulls, int64_
                    cudf::table_view const& right,                                      \
                    cudf::ast::operation binary_pred,                                   \
                    cudf::null_equality compare_nulls) {                                \
-      return cudf::conditional_inner_join(left, right, binary_pred);                   \
+      return cudf::conditional_full_join(left, right, binary_pred);                    \
     };                                                                                 \
     constexpr bool is_conditional = true;                                              \
     BM_join<key_type, payload_type, nullable, is_conditional>(st, join);               \

From 7f2a47175072ac930639b9b5a6c582bb3d3fb173 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 10 Nov 2022 15:50:46 -0500
Subject: [PATCH 154/202] Fix regex working-memory-size refactor error (#12119)

Fixes error in `working_memory_size()` member function passing the parameters incorrectly.
This was introduce in #11927 and found in the nightly compute-sanitizer check.
https://gpuci.gpuopenanalytics.com/job/rapidsai/job/gpuci/job/cudf/job/branches/job/cudf-gpu-build-branch-22.12/19/CUDA=11.5/testReport/junit/cudamemcheck/STRINGS_TEST/StringsContainsTests_ContainsTest/

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Jason Lowe (https://github.com/jlowe)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12119
---
 cpp/src/strings/regex/regexec.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/strings/regex/regexec.cpp b/cpp/src/strings/regex/regexec.cpp
index 1c0a6869a2c..febad651f69 100644
--- a/cpp/src/strings/regex/regexec.cpp
+++ b/cpp/src/strings/regex/regexec.cpp
@@ -130,7 +130,7 @@ void reprog_device::destroy() { delete this; }
 
 std::size_t reprog_device::working_memory_size(int32_t num_threads) const
 {
-  return compute_working_memory_size(insts_counts(), num_threads);
+  return compute_working_memory_size(num_threads, insts_counts());
 }
 
 std::pair<std::size_t, int32_t> reprog_device::compute_strided_working_memory(

From 70c7b7a4fa2ce0636de8227d04db8fadf9190d86 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 10 Nov 2022 13:55:05 -0800
Subject: [PATCH 155/202] Refactor Parquet reader (#12046)

This is a rather non-simple refactor of Parquet reader, no new features or changes in algorithms were made:
 * Rename some functions.
 * Moving a lot of declarations and definitions of functions/structs/classes around.
 * Extract out some functions/structs/classes and put them into new files.
 * Rewrite doxgen for some functions
 * Use aliases for member variables (to shorten their names), instead of passing them as function parameters
 * Etc.

Note that this is merely moving the current implementation around, preparing for adding chunked Parquet reader which is a fairly large implementation.

This is also a blocker for:
 * https://github.com/rapidsai/cudf/pull/11867
 * https://github.com/rapidsai/cudf/pull/11961

Authors:
  - Nghia Truong (https://github.com/ttnghia)
  - https://github.com/nvdbaranec

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12046
---
 cpp/CMakeLists.txt                           |    5 +-
 cpp/include/cudf/io/detail/parquet.hpp       |   21 +-
 cpp/include/cudf/io/parquet.hpp              |    7 +-
 cpp/src/io/parquet/chunk_dict.cu             |    2 +-
 cpp/src/io/parquet/page_enc.cu               |    3 +-
 cpp/src/io/parquet/parquet_gpu.cuh           |   85 +
 cpp/src/io/parquet/parquet_gpu.hpp           |   77 +-
 cpp/src/io/parquet/reader.cpp                |   43 +
 cpp/src/io/parquet/reader_impl.cpp           |  312 +++
 cpp/src/io/parquet/reader_impl.cu            | 1855 ------------------
 cpp/src/io/parquet/reader_impl.hpp           |  157 +-
 cpp/src/io/parquet/reader_impl_helpers.cpp   |  629 ++++++
 cpp/src/io/parquet/reader_impl_helpers.hpp   |  197 ++
 cpp/src/io/parquet/reader_impl_preprocess.cu |  814 ++++++++
 cpp/src/io/parquet/writer_impl.cu            |    1 +
 15 files changed, 2179 insertions(+), 2029 deletions(-)
 create mode 100644 cpp/src/io/parquet/parquet_gpu.cuh
 create mode 100644 cpp/src/io/parquet/reader.cpp
 create mode 100644 cpp/src/io/parquet/reader_impl.cpp
 delete mode 100644 cpp/src/io/parquet/reader_impl.cu
 create mode 100644 cpp/src/io/parquet/reader_impl_helpers.cpp
 create mode 100644 cpp/src/io/parquet/reader_impl_helpers.hpp
 create mode 100644 cpp/src/io/parquet/reader_impl_preprocess.cu

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index a71eeb7cfbe..7e8ee5b60bf 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -346,7 +346,10 @@ add_library(
   src/io/parquet/chunk_dict.cu
   src/io/parquet/page_enc.cu
   src/io/parquet/page_hdr.cu
-  src/io/parquet/reader_impl.cu
+  src/io/parquet/reader.cpp
+  src/io/parquet/reader_impl.cpp
+  src/io/parquet/reader_impl_helpers.cpp
+  src/io/parquet/reader_impl_preprocess.cu
   src/io/parquet/writer_impl.cu
   src/io/statistics/orc_column_statistics.cu
   src/io/statistics/parquet_column_statistics.cu
diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 7675dc70cb2..8c7a7a21978 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -30,25 +30,28 @@
 #include <string>
 #include <vector>
 
-namespace cudf {
-namespace io {
+namespace cudf::io {
 
 // Forward declaration
 class parquet_reader_options;
 class parquet_writer_options;
 class chunked_parquet_writer_options;
 
-namespace detail {
-namespace parquet {
+namespace detail::parquet {
 
 /**
  * @brief Class to read Parquet dataset data into columns.
  */
 class reader {
- private:
+ protected:
   class impl;
   std::unique_ptr<impl> _impl;
 
+  /**
+   * @brief Default constructor, needed for subclassing.
+   */
+  reader();
+
  public:
   /**
    * @brief Constructor from an array of datasources
@@ -66,7 +69,7 @@ class reader {
   /**
    * @brief Destructor explicitly-declared to avoid inlined in header
    */
-  ~reader();
+  virtual ~reader();
 
   /**
    * @brief Reads the dataset as per given options.
@@ -154,7 +157,5 @@ class writer {
     const std::vector<std::unique_ptr<std::vector<uint8_t>>>& metadata_list);
 };
 
-};  // namespace parquet
-};  // namespace detail
-};  // namespace io
-};  // namespace cudf
+}  // namespace detail::parquet
+}  // namespace cudf::io
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index ff5b9f5c457..c5425de308c 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -30,8 +30,7 @@
 #include <string>
 #include <vector>
 
-namespace cudf {
-namespace io {
+namespace cudf::io {
 /**
  * @addtogroup io_readers
  * @{
@@ -1452,5 +1451,5 @@ class parquet_chunked_writer {
 };
 
 /** @} */  // end of group
-}  // namespace io
-}  // namespace cudf
+
+}  // namespace cudf::io
diff --git a/cpp/src/io/parquet/chunk_dict.cu b/cpp/src/io/parquet/chunk_dict.cu
index 671e34ac73d..999cad76d5d 100644
--- a/cpp/src/io/parquet/chunk_dict.cu
+++ b/cpp/src/io/parquet/chunk_dict.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include <io/parquet/parquet_gpu.hpp>
+#include "parquet_gpu.cuh"
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/utilities/cuda.cuh>
diff --git a/cpp/src/io/parquet/page_enc.cu b/cpp/src/io/parquet/page_enc.cu
index 8a07ee419b4..74e98de4100 100644
--- a/cpp/src/io/parquet/page_enc.cu
+++ b/cpp/src/io/parquet/page_enc.cu
@@ -13,7 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "parquet_gpu.hpp"
+
+#include "parquet_gpu.cuh"
 
 #include <io/utilities/block_utils.cuh>
 
diff --git a/cpp/src/io/parquet/parquet_gpu.cuh b/cpp/src/io/parquet/parquet_gpu.cuh
new file mode 100644
index 00000000000..793573b465e
--- /dev/null
+++ b/cpp/src/io/parquet/parquet_gpu.cuh
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "parquet_gpu.hpp"
+
+#include <cudf/lists/lists_column_device_view.cuh>
+#include <cudf/types.hpp>
+
+#include <cuco/static_map.cuh>
+
+namespace cudf::io::parquet::gpu {
+
+auto constexpr KEY_SENTINEL   = size_type{-1};
+auto constexpr VALUE_SENTINEL = size_type{-1};
+
+using map_type = cuco::static_map<size_type, size_type>;
+
+/**
+ * @brief The alias of `map_type::pair_atomic_type` class.
+ *
+ * Declare this struct by trivial subclassing instead of type aliasing so we can have forward
+ * declaration of this struct somewhere else.
+ */
+struct slot_type : public map_type::pair_atomic_type {
+};
+
+/**
+ * @brief Return the byte length of parquet dtypes that are physically represented by INT32
+ */
+inline uint32_t __device__ int32_logical_len(type_id id)
+{
+  switch (id) {
+    case cudf::type_id::INT8: [[fallthrough]];
+    case cudf::type_id::UINT8: return 1;
+    case cudf::type_id::INT16: [[fallthrough]];
+    case cudf::type_id::UINT16: return 2;
+    case cudf::type_id::DURATION_SECONDS: [[fallthrough]];
+    case cudf::type_id::DURATION_MILLISECONDS: return 8;
+    default: return 4;
+  }
+}
+
+/**
+ * @brief Translate the row index of a parent column_device_view into the index of the first value
+ * in the leaf child.
+ * Only works in the context of parquet writer where struct columns are previously modified s.t.
+ * they only have one immediate child.
+ */
+inline size_type __device__ row_to_value_idx(size_type idx,
+                                             parquet_column_device_view const& parquet_col)
+{
+  // with a byte array, we can't go all the way down to the leaf node, but instead we want to leave
+  // the size at the parent level because we are writing out parent row byte arrays.
+  auto col = *parquet_col.parent_column;
+  while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) {
+    if (col.type().id() == type_id::STRUCT) {
+      idx += col.offset();
+      col = col.child(0);
+    } else {
+      auto list_col = cudf::detail::lists_column_device_view(col);
+      auto child    = list_col.child();
+      if (parquet_col.output_as_byte_array && child.type().id() == type_id::UINT8) { break; }
+      idx = list_col.offset_at(idx);
+      col = child;
+    }
+  }
+  return idx;
+}
+
+}  // namespace cudf::io::parquet::gpu
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index ea3678129ac..7849e05eb68 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -23,14 +23,10 @@
 #include "io/utilities/column_buffer.hpp"
 #include "io/utilities/hostdevice_vector.hpp"
 
-#include <cudf/column/column_device_view.cuh>
-#include <cudf/lists/lists_column_device_view.cuh>
-#include <cudf/table/table_device_view.cuh>
+#include <cudf/io/datasource.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
 
-#include <cuco/static_map.cuh>
-
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
@@ -39,9 +35,7 @@
 
 #include <vector>
 
-namespace cudf {
-namespace io {
-namespace parquet {
+namespace cudf::io::parquet {
 
 using cudf::io::detail::string_index_pair;
 
@@ -72,11 +66,6 @@ struct input_column_info {
 
 namespace gpu {
 
-auto constexpr KEY_SENTINEL   = size_type{-1};
-auto constexpr VALUE_SENTINEL = size_type{-1};
-using map_type                = cuco::static_map<size_type, size_type>;
-using slot_type               = map_type::pair_atomic_type;
-
 /**
  * @brief Enums for the flags in the page header
  */
@@ -108,7 +97,8 @@ struct PageNestingInfo {
   int32_t max_rep_level;
 
   // set during preprocessing
-  int32_t size;              // this page/nesting-level's size contribution to the output column
+  int32_t size;  // this page/nesting-level's row count contribution to the output column, if fully
+                 // decoded
   int32_t page_start_value;  // absolute output start index in output column data
 
   // set during data decoding
@@ -247,6 +237,17 @@ struct ColumnChunkDesc {
   int32_t src_col_schema;  // my schema index in the file
 };
 
+/**
+ * @brief The struct to store raw/intermediate file data before parsing.
+ */
+struct file_intermediate_data {
+  std::vector<std::unique_ptr<datasource::buffer>> raw_page_data;
+  rmm::device_buffer decomp_page_data;
+  hostdevice_vector<gpu::ColumnChunkDesc> chunks{};
+  hostdevice_vector<gpu::PageInfo> pages_info{};
+  hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
+};
+
 /**
  * @brief Struct describing an encoder column
  */
@@ -293,50 +294,8 @@ struct PageFragment {
 constexpr unsigned int kDictHashBits = 16;
 constexpr size_t kDictScratchSize    = (1 << kDictHashBits) * sizeof(uint32_t);
 
-/**
- * @brief Return the byte length of parquet dtypes that are physically represented by INT32
- */
-inline uint32_t __device__ int32_logical_len(type_id id)
-{
-  switch (id) {
-    case cudf::type_id::INT8: [[fallthrough]];
-    case cudf::type_id::UINT8: return 1;
-    case cudf::type_id::INT16: [[fallthrough]];
-    case cudf::type_id::UINT16: return 2;
-    case cudf::type_id::DURATION_SECONDS: [[fallthrough]];
-    case cudf::type_id::DURATION_MILLISECONDS: return 8;
-    default: return 4;
-  }
-}
-
-/**
- * @brief Translate the row index of a parent column_device_view into the index of the first value
- * in the leaf child.
- * Only works in the context of parquet writer where struct columns are previously modified s.t.
- * they only have one immediate child.
- */
-inline size_type __device__ row_to_value_idx(size_type idx,
-                                             parquet_column_device_view const& parquet_col)
-{
-  // with a byte array, we can't go all the way down to the leaf node, but instead we want to leave
-  // the size at the parent level because we are writing out parent row byte arrays.
-  auto col = *parquet_col.parent_column;
-  while (col.type().id() == type_id::LIST or col.type().id() == type_id::STRUCT) {
-    if (col.type().id() == type_id::STRUCT) {
-      idx += col.offset();
-      col = col.child(0);
-    } else {
-      auto list_col = cudf::detail::lists_column_device_view(col);
-      auto child    = list_col.child();
-      if (parquet_col.output_as_byte_array && child.type().id() == type_id::UINT8) { break; }
-      idx = list_col.offset_at(idx);
-      col = child;
-    }
-  }
-  return idx;
-}
-
 struct EncPage;
+struct slot_type;
 
 /**
  * @brief Struct describing an encoder column chunk
@@ -630,6 +589,4 @@ void EncodeColumnIndexes(device_span<EncColumnChunk> chunks,
                          rmm::cuda_stream_view stream);
 
 }  // namespace gpu
-}  // namespace parquet
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::parquet
diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp
new file mode 100644
index 00000000000..6be6987b7cb
--- /dev/null
+++ b/cpp/src/io/parquet/reader.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reader_impl.hpp"
+
+namespace cudf::io::detail::parquet {
+
+reader::reader() = default;
+
+reader::reader(std::vector<std::unique_ptr<datasource>>&& sources,
+               parquet_reader_options const& options,
+               rmm::cuda_stream_view stream,
+               rmm::mr::device_memory_resource* mr)
+  : _impl(std::make_unique<impl>(std::move(sources), options, stream, mr))
+{
+}
+
+reader::~reader() = default;
+
+table_with_metadata reader::read(parquet_reader_options const& options)
+{
+  // if the user has specified custom row bounds
+  bool const uses_custom_row_bounds = options.get_num_rows() >= 0 || options.get_skip_rows() != 0;
+  return _impl->read(options.get_skip_rows(),
+                     options.get_num_rows(),
+                     uses_custom_row_bounds,
+                     options.get_row_groups());
+}
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
new file mode 100644
index 00000000000..a61f63f6645
--- /dev/null
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reader_impl.hpp"
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <numeric>
+
+namespace cudf::io::detail::parquet {
+
+void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
+{
+  auto& chunks       = _file_itm_data.chunks;
+  auto& pages        = _file_itm_data.pages_info;
+  auto& page_nesting = _file_itm_data.page_nesting_info;
+
+  auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
+    return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
+  };
+
+  // Count the number of string dictionary entries
+  // NOTE: Assumes first page in the chunk is always the dictionary page
+  size_t total_str_dict_indexes = 0;
+  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+    if (is_dict_chunk(chunks[c])) { total_str_dict_indexes += pages[page_count].num_input_values; }
+    page_count += chunks[c].max_num_pages;
+  }
+
+  // Build index for string dictionaries since they can't be indexed
+  // directly due to variable-sized elements
+  auto str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
+    total_str_dict_indexes, _stream);
+
+  size_t const sum_max_depths = std::accumulate(
+    chunks.begin(), chunks.end(), 0, [&](size_t cursum, gpu::ColumnChunkDesc const& chunk) {
+      return cursum + _metadata->get_output_nesting_depth(chunk.src_col_schema);
+    });
+
+  // In order to reduce the number of allocations of hostdevice_vector, we allocate a single vector
+  // to store all per-chunk pointers to nested data/nullmask. `chunk_offsets[i]` will store the
+  // offset into `chunk_nested_data`/`chunk_nested_valids` for the array of pointers for chunk `i`
+  auto chunk_nested_valids = hostdevice_vector<uint32_t*>(sum_max_depths, _stream);
+  auto chunk_nested_data   = hostdevice_vector<void*>(sum_max_depths, _stream);
+  auto chunk_offsets       = std::vector<size_t>();
+
+  // Update chunks with pointers to column data.
+  for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) {
+    input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
+    CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema,
+                 "Column/page schema index mismatch");
+
+    if (is_dict_chunk(chunks[c])) {
+      chunks[c].str_dict_index = str_dict_index.data() + str_ofs;
+      str_ofs += pages[page_count].num_input_values;
+    }
+
+    size_t max_depth = _metadata->get_output_nesting_depth(chunks[c].src_col_schema);
+    chunk_offsets.push_back(chunk_off);
+
+    // get a slice of size `nesting depth` from `chunk_nested_valids` to store an array of pointers
+    // to validity data
+    auto valids              = chunk_nested_valids.host_ptr(chunk_off);
+    chunks[c].valid_map_base = chunk_nested_valids.device_ptr(chunk_off);
+
+    // get a slice of size `nesting depth` from `chunk_nested_data` to store an array of pointers to
+    // out data
+    auto data                  = chunk_nested_data.host_ptr(chunk_off);
+    chunks[c].column_data_base = chunk_nested_data.device_ptr(chunk_off);
+
+    chunk_off += max_depth;
+
+    // fill in the arrays on the host.  there are some important considerations to
+    // take into account here for nested columns.  specifically, with structs
+    // there is sharing of output buffers between input columns.  consider this schema
+    //
+    //  required group field_id=1 name {
+    //    required binary field_id=2 firstname (String);
+    //    required binary field_id=3 middlename (String);
+    //    required binary field_id=4 lastname (String);
+    // }
+    //
+    // there are 3 input columns of data here (firstname, middlename, lastname), but
+    // only 1 output column (name).  The structure of the output column buffers looks like
+    // the schema itself
+    //
+    // struct      (name)
+    //     string  (firstname)
+    //     string  (middlename)
+    //     string  (lastname)
+    //
+    // The struct column can contain validity information. the problem is, the decode
+    // step for the input columns will all attempt to decode this validity information
+    // because each one has it's own copy of the repetition/definition levels. but
+    // since this is all happening in parallel it would mean multiple blocks would
+    // be stomping all over the same memory randomly.  to work around this, we set
+    // things up so that only 1 child of any given nesting level fills in the
+    // data (offsets in the case of lists) or validity information for the higher
+    // levels of the hierarchy that are shared.  In this case, it would mean we
+    // would just choose firstname to be the one that decodes the validity for name.
+    //
+    // we do this by only handing out the pointers to the first child we come across.
+    //
+    auto* cols = &_output_buffers;
+    for (size_t idx = 0; idx < max_depth; idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[idx]];
+      cols          = &out_buf.children;
+
+      int owning_schema = out_buf.user_data & PARQUET_COLUMN_BUFFER_SCHEMA_MASK;
+      if (owning_schema == 0 || owning_schema == input_col.schema_idx) {
+        valids[idx] = out_buf.null_mask();
+        data[idx]   = out_buf.data();
+        out_buf.user_data |=
+          static_cast<uint32_t>(input_col.schema_idx) & PARQUET_COLUMN_BUFFER_SCHEMA_MASK;
+      } else {
+        valids[idx] = nullptr;
+        data[idx]   = nullptr;
+      }
+    }
+
+    // column_data_base will always point to leaf data, even for nested types.
+    page_count += chunks[c].max_num_pages;
+  }
+
+  chunks.host_to_device(_stream);
+  chunk_nested_valids.host_to_device(_stream);
+  chunk_nested_data.host_to_device(_stream);
+
+  if (total_str_dict_indexes > 0) {
+    gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
+  }
+
+  gpu::DecodePageData(pages, chunks, num_rows, skip_rows, _stream);
+  pages.device_to_host(_stream);
+  page_nesting.device_to_host(_stream);
+  _stream.synchronize();
+
+  // for list columns, add the final offset to every offset buffer.
+  // TODO : make this happen in more efficiently. Maybe use thrust::for_each
+  // on each buffer.  Or potentially do it in PreprocessColumnData
+  // Note : the reason we are doing this here instead of in the decode kernel is
+  // that it is difficult/impossible for a given page to know that it is writing the very
+  // last value that should then be followed by a terminator (because rows can span
+  // page boundaries).
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    input_column_info const& input_col = _input_columns[idx];
+
+    auto* cols = &_output_buffers;
+    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      if (out_buf.type.id() != type_id::LIST ||
+          (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED)) {
+        continue;
+      }
+      CUDF_EXPECTS(l_idx < input_col.nesting_depth() - 1, "Encountered a leaf list column");
+      auto& child = (*cols)[input_col.nesting[l_idx + 1]];
+
+      // the final offset for a list at level N is the size of it's child
+      int offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size;
+      cudaMemcpyAsync(static_cast<int32_t*>(out_buf.data()) + (out_buf.size - 1),
+                      &offset,
+                      sizeof(offset),
+                      cudaMemcpyHostToDevice,
+                      _stream.value());
+      out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED;
+    }
+  }
+
+  // update null counts in the final column buffers
+  for (size_t idx = 0; idx < pages.size(); idx++) {
+    gpu::PageInfo* pi = &pages[idx];
+    if (pi->flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; }
+    gpu::ColumnChunkDesc* col          = &chunks[pi->chunk_idx];
+    input_column_info const& input_col = _input_columns[col->src_col_index];
+
+    int index                 = pi->nesting - page_nesting.device_ptr();
+    gpu::PageNestingInfo* pni = &page_nesting[index];
+
+    auto* cols = &_output_buffers;
+    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if I wasn't the one who wrote out the validity bits, skip it
+      if (chunk_nested_valids.host_ptr(chunk_offsets[pi->chunk_idx])[l_idx] == nullptr) {
+        continue;
+      }
+      out_buf.null_count() += pni[l_idx].null_count;
+    }
+  }
+
+  _stream.synchronize();
+}
+
+reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
+                   parquet_reader_options const& options,
+                   rmm::cuda_stream_view stream,
+                   rmm::mr::device_memory_resource* mr)
+  : _stream(stream), _mr(mr), _sources(std::move(sources))
+{
+  // Open and parse the source dataset metadata
+  _metadata = std::make_unique<aggregate_reader_metadata>(_sources);
+
+  // Override output timestamp resolution if requested
+  if (options.get_timestamp_type().id() != type_id::EMPTY) {
+    _timestamp_type = options.get_timestamp_type();
+  }
+
+  // Strings may be returned as either string or categorical columns
+  _strings_to_categorical = options.is_enabled_convert_strings_to_categories();
+
+  // Binary columns can be read as binary or strings
+  _reader_column_schema = options.get_column_schema();
+
+  // Select only columns required by the options
+  std::tie(_input_columns, _output_buffers, _output_column_schemas) =
+    _metadata->select_columns(options.get_columns(),
+                              options.is_enabled_use_pandas_metadata(),
+                              _strings_to_categorical,
+                              _timestamp_type.id());
+}
+
+void reader::impl::prepare_data(size_type skip_rows,
+                                size_type num_rows,
+                                bool uses_custom_row_bounds,
+                                host_span<std::vector<size_type> const> row_group_indices)
+{
+  const auto [skip_rows_corrected, num_rows_corrected, row_groups_info] =
+    _metadata->select_row_groups(row_group_indices, skip_rows, num_rows);
+  _skip_rows = skip_rows_corrected;
+  _num_rows  = num_rows_corrected;
+
+  if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
+    load_and_decompress_data(row_groups_info, num_rows_corrected);
+  }
+}
+
+table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
+{
+  auto out_metadata = table_metadata{};
+
+  // output cudf columns as determined by the top level schema
+  auto out_columns = std::vector<std::unique_ptr<column>>{};
+  out_columns.reserve(_output_buffers.size());
+
+  if (_num_rows == 0) { return finalize_output(out_metadata, out_columns); }
+
+  allocate_columns(_skip_rows, _num_rows, uses_custom_row_bounds);
+
+  decode_page_data(_skip_rows, _num_rows);
+
+  // Create the final output cudf columns
+  for (size_t i = 0; i < _output_buffers.size(); ++i) {
+    auto const metadata        = _reader_column_schema.has_value()
+                                   ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
+                                   : std::nullopt;
+    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+    out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream, _mr));
+  }
+
+  return finalize_output(out_metadata, out_columns);
+}
+
+table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
+                                                  std::vector<std::unique_ptr<column>>& out_columns)
+{
+  // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
+  for (size_t i = out_columns.size(); i < _output_buffers.size(); ++i) {
+    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+    out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr));
+  }
+
+  // Return column names (must match order of returned columns)
+  out_metadata.column_names.resize(_output_buffers.size());
+  for (size_t i = 0; i < _output_column_schemas.size(); i++) {
+    auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
+    out_metadata.column_names[i] = schema.name;
+  }
+
+  // Return user metadata
+  out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
+  out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
+                            out_metadata.per_file_user_data[0].end()};
+
+  return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
+}
+
+table_with_metadata reader::impl::read(size_type skip_rows,
+                                       size_type num_rows,
+                                       bool uses_custom_row_bounds,
+                                       host_span<std::vector<size_type> const> row_group_indices)
+{
+  prepare_data(skip_rows, num_rows, uses_custom_row_bounds, row_group_indices);
+  return read_chunk_internal(uses_custom_row_bounds);
+}
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.cu b/cpp/src/io/parquet/reader_impl.cu
deleted file mode 100644
index 50893ebe583..00000000000
--- a/cpp/src/io/parquet/reader_impl.cu
+++ /dev/null
@@ -1,1855 +0,0 @@
-/*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * @file reader_impl.cu
- * @brief cuDF-IO Parquet reader class implementation
- */
-
-#include "reader_impl.hpp"
-
-#include "compact_protocol_reader.hpp"
-
-#include <io/comp/gpuinflate.hpp>
-#include <io/comp/nvcomp_adapter.hpp>
-#include <io/utilities/config_utils.hpp>
-#include <io/utilities/time_utils.cuh>
-
-#include <cudf/detail/utilities/integer_utils.hpp>
-#include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/utilities/error.hpp>
-#include <cudf/utilities/traits.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <thrust/fill.h>
-#include <thrust/for_each.h>
-#include <thrust/iterator/zip_iterator.h>
-#include <thrust/logical.h>
-#include <thrust/transform.h>
-#include <thrust/tuple.h>
-
-#include <algorithm>
-#include <array>
-#include <numeric>
-#include <regex>
-
-namespace cudf {
-namespace io {
-namespace detail {
-namespace parquet {
-// Import functionality that's independent of legacy code
-using namespace cudf::io::parquet;
-using namespace cudf::io;
-
-namespace {
-
-parquet::ConvertedType logical_type_to_converted_type(parquet::LogicalType const& logical)
-{
-  if (logical.isset.STRING) {
-    return parquet::UTF8;
-  } else if (logical.isset.MAP) {
-    return parquet::MAP;
-  } else if (logical.isset.LIST) {
-    return parquet::LIST;
-  } else if (logical.isset.ENUM) {
-    return parquet::ENUM;
-  } else if (logical.isset.DECIMAL) {
-    return parquet::DECIMAL;  // TODO set decimal values
-  } else if (logical.isset.DATE) {
-    return parquet::DATE;
-  } else if (logical.isset.TIME) {
-    if (logical.TIME.unit.isset.MILLIS)
-      return parquet::TIME_MILLIS;
-    else if (logical.TIME.unit.isset.MICROS)
-      return parquet::TIME_MICROS;
-  } else if (logical.isset.TIMESTAMP) {
-    if (logical.TIMESTAMP.unit.isset.MILLIS)
-      return parquet::TIMESTAMP_MILLIS;
-    else if (logical.TIMESTAMP.unit.isset.MICROS)
-      return parquet::TIMESTAMP_MICROS;
-  } else if (logical.isset.INTEGER) {
-    switch (logical.INTEGER.bitWidth) {
-      case 8: return logical.INTEGER.isSigned ? INT_8 : UINT_8;
-      case 16: return logical.INTEGER.isSigned ? INT_16 : UINT_16;
-      case 32: return logical.INTEGER.isSigned ? INT_32 : UINT_32;
-      case 64: return logical.INTEGER.isSigned ? INT_64 : UINT_64;
-      default: break;
-    }
-  } else if (logical.isset.UNKNOWN) {
-    return parquet::NA;
-  } else if (logical.isset.JSON) {
-    return parquet::JSON;
-  } else if (logical.isset.BSON) {
-    return parquet::BSON;
-  }
-  return parquet::UNKNOWN;
-}
-
-/**
- * @brief Function that translates Parquet datatype to cuDF type enum
- */
-type_id to_type_id(SchemaElement const& schema,
-                   bool strings_to_categorical,
-                   type_id timestamp_type_id)
-{
-  parquet::Type const physical            = schema.type;
-  parquet::LogicalType const logical_type = schema.logical_type;
-  parquet::ConvertedType converted_type   = schema.converted_type;
-  int32_t decimal_scale                   = schema.decimal_scale;
-
-  // Logical type used for actual data interpretation; the legacy converted type
-  // is superceded by 'logical' type whenever available.
-  auto const inferred_converted_type = logical_type_to_converted_type(logical_type);
-  if (inferred_converted_type != parquet::UNKNOWN) converted_type = inferred_converted_type;
-  if (inferred_converted_type == parquet::DECIMAL && decimal_scale == 0)
-    decimal_scale = schema.logical_type.DECIMAL.scale;
-
-  switch (converted_type) {
-    case parquet::UINT_8: return type_id::UINT8;
-    case parquet::INT_8: return type_id::INT8;
-    case parquet::UINT_16: return type_id::UINT16;
-    case parquet::INT_16: return type_id::INT16;
-    case parquet::UINT_32: return type_id::UINT32;
-    case parquet::UINT_64: return type_id::UINT64;
-    case parquet::DATE: return type_id::TIMESTAMP_DAYS;
-    case parquet::TIME_MILLIS: return type_id::DURATION_MILLISECONDS;
-    case parquet::TIME_MICROS: return type_id::DURATION_MICROSECONDS;
-    case parquet::TIMESTAMP_MILLIS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::TIMESTAMP_MILLISECONDS;
-    case parquet::TIMESTAMP_MICROS:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::TIMESTAMP_MICROSECONDS;
-    case parquet::DECIMAL:
-      if (physical == parquet::INT32) { return type_id::DECIMAL32; }
-      if (physical == parquet::INT64) { return type_id::DECIMAL64; }
-      if (physical == parquet::FIXED_LEN_BYTE_ARRAY) {
-        if (schema.type_length <= static_cast<int32_t>(sizeof(int32_t))) {
-          return type_id::DECIMAL32;
-        }
-        if (schema.type_length <= static_cast<int32_t>(sizeof(int64_t))) {
-          return type_id::DECIMAL64;
-        }
-        if (schema.type_length <= static_cast<int32_t>(sizeof(__int128_t))) {
-          return type_id::DECIMAL128;
-        }
-      }
-      CUDF_FAIL("Invalid representation of decimal type");
-      break;
-
-    // maps are just List<Struct<>>.
-    case parquet::MAP:
-    case parquet::LIST: return type_id::LIST;
-    case parquet::NA: return type_id::STRING;
-    // return type_id::EMPTY; //TODO(kn): enable after Null/Empty column support
-    default: break;
-  }
-
-  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
-      logical_type.TIMESTAMP.unit.isset.NANOS) {
-    return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                 : type_id::TIMESTAMP_NANOSECONDS;
-  }
-
-  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
-      logical_type.TIME.unit.isset.NANOS) {
-    return type_id::DURATION_NANOSECONDS;
-  }
-
-  // is it simply a struct?
-  if (schema.is_struct()) { return type_id::STRUCT; }
-
-  // Physical storage type supported by Parquet; controls the on-disk storage
-  // format in combination with the encoding type.
-  switch (physical) {
-    case parquet::BOOLEAN: return type_id::BOOL8;
-    case parquet::INT32: return type_id::INT32;
-    case parquet::INT64: return type_id::INT64;
-    case parquet::FLOAT: return type_id::FLOAT32;
-    case parquet::DOUBLE: return type_id::FLOAT64;
-    case parquet::BYTE_ARRAY:
-    case parquet::FIXED_LEN_BYTE_ARRAY:
-      // Can be mapped to INT32 (32-bit hash) or STRING
-      return strings_to_categorical ? type_id::INT32 : type_id::STRING;
-    case parquet::INT96:
-      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
-                                                   : type_id::TIMESTAMP_NANOSECONDS;
-    default: break;
-  }
-
-  return type_id::EMPTY;
-}
-
-/**
- * @brief Converts cuDF type enum to column logical type
- */
-data_type to_data_type(type_id t_id, SchemaElement const& schema)
-{
-  return t_id == type_id::DECIMAL32 || t_id == type_id::DECIMAL64 || t_id == type_id::DECIMAL128
-           ? data_type{t_id, numeric::scale_type{-schema.decimal_scale}}
-           : data_type{t_id};
-}
-
-/**
- * @brief Function that returns the required the number of bits to store a value
- */
-template <typename T = uint8_t>
-T required_bits(uint32_t max_level)
-{
-  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
-}
-
-/**
- * @brief Converts cuDF units to Parquet units.
- *
- * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
- */
-std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
-                                                     type_id timestamp_type_id,
-                                                     parquet::Type physical,
-                                                     int8_t converted,
-                                                     int32_t length)
-{
-  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
-  int32_t clock_rate = 0;
-  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
-    type_width = 1;  // I32 -> I8
-  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
-    type_width = 2;  // I32 -> I16
-  } else if (column_type_id == type_id::INT32) {
-    type_width = 4;  // str -> hash32
-  } else if (is_chrono(data_type{column_type_id})) {
-    clock_rate = to_clockrate(timestamp_type_id);
-  }
-
-  int8_t converted_type = converted;
-  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
-      not cudf::is_fixed_point(data_type{column_type_id})) {
-    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
-  }
-  return std::make_tuple(type_width, clock_rate, converted_type);
-}
-
-inline void decompress_check(device_span<compression_result const> results,
-                             rmm::cuda_stream_view stream)
-{
-  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
-                              results.begin(),
-                              results.end(),
-                              [] __device__(auto const& res) {
-                                return res.status == compression_status::SUCCESS;
-                              }),
-               "Error during decompression");
-}
-}  // namespace
-
-std::string name_from_path(const std::vector<std::string>& path_in_schema)
-{
-  // For the case of lists, we will see a schema that looks like:
-  // a.list.element.list.element
-  // where each (list.item) pair represents a level of nesting.  According to the parquet spec,
-  // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
-  // the initial field must be named "list" and the inner element must be named "element".
-  // If we are dealing with a list, we want to return the topmost name of the group ("a").
-  //
-  // For other nested schemas, like structs we just want to return the bottom-most name. For
-  // example a struct with the schema
-  // b.employee.id,  the column representing "id" should simply be named "id".
-  //
-  // In short, this means : return the highest level of the schema that does not have list
-  // definitions underneath it.
-  //
-  std::string s = (path_in_schema.size() > 0) ? path_in_schema[0] : "";
-  for (size_t i = 1; i < path_in_schema.size(); i++) {
-    // The Parquet spec requires that the outer schema field is named "list". However it also
-    // provides a list of backwards compatibility cases that are applicable as well.  Currently
-    // we are only handling the formal spec.  This will get cleaned up and improved when we add
-    // support for structs. The correct thing to do will probably be to examine the type of
-    // the SchemaElement itself to concretely identify the start of a nested type of any kind rather
-    // than trying to derive it from the path string.
-    if (path_in_schema[i] == "list") {
-      // Again, strictly speaking, the Parquet spec says the inner field should be named
-      // "element", but there are some backwards compatibility issues that we have seen in the
-      // wild. For example, Pandas calls the field "item".  We will allow any name for now.
-      i++;
-      continue;
-    }
-    // otherwise, we've got a real nested column. update the name
-    s = path_in_schema[i];
-  }
-  return s;
-}
-
-/**
- * @brief Class for parsing dataset metadata
- */
-struct metadata : public FileMetaData {
-  explicit metadata(datasource* source)
-  {
-    constexpr auto header_len = sizeof(file_header_s);
-    constexpr auto ender_len  = sizeof(file_ender_s);
-
-    const auto len           = source->size();
-    const auto header_buffer = source->host_read(0, header_len);
-    const auto header        = reinterpret_cast<const file_header_s*>(header_buffer->data());
-    const auto ender_buffer  = source->host_read(len - ender_len, ender_len);
-    const auto ender         = reinterpret_cast<const file_ender_s*>(ender_buffer->data());
-    CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
-    CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic,
-                 "Corrupted header or footer");
-    CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
-                 "Incorrect footer length");
-
-    const auto buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
-    CompactProtocolReader cp(buffer->data(), ender->footer_len);
-    CUDF_EXPECTS(cp.read(this), "Cannot parse metadata");
-    CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
-  }
-};
-
-class aggregate_reader_metadata {
-  std::vector<metadata> per_file_metadata;
-  std::vector<std::unordered_map<std::string, std::string>> keyval_maps;
-  size_type num_rows;
-  size_type num_row_groups;
-  /**
-   * @brief Create a metadata object from each element in the source vector
-   */
-  auto metadatas_from_sources(std::vector<std::unique_ptr<datasource>> const& sources)
-  {
-    std::vector<metadata> metadatas;
-    std::transform(
-      sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const& source) {
-        return metadata(source.get());
-      });
-    return metadatas;
-  }
-
-  /**
-   * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
-   */
-  [[nodiscard]] auto collect_keyval_metadata()
-  {
-    std::vector<std::unordered_map<std::string, std::string>> kv_maps;
-    std::transform(per_file_metadata.cbegin(),
-                   per_file_metadata.cend(),
-                   std::back_inserter(kv_maps),
-                   [](auto const& pfm) {
-                     std::unordered_map<std::string, std::string> kv_map;
-                     std::transform(pfm.key_value_metadata.cbegin(),
-                                    pfm.key_value_metadata.cend(),
-                                    std::inserter(kv_map, kv_map.end()),
-                                    [](auto const& kv) {
-                                      return std::pair{kv.key, kv.value};
-                                    });
-                     return kv_map;
-                   });
-
-    return kv_maps;
-  }
-
-  /**
-   * @brief Sums up the number of rows of each source
-   */
-  [[nodiscard]] size_type calc_num_rows() const
-  {
-    return std::accumulate(
-      per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
-        return sum + pfm.num_rows;
-      });
-  }
-
-  /**
-   * @brief Sums up the number of row groups of each source
-   */
-  [[nodiscard]] size_type calc_num_row_groups() const
-  {
-    return std::accumulate(
-      per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
-        return sum + pfm.row_groups.size();
-      });
-  }
-
- public:
-  aggregate_reader_metadata(std::vector<std::unique_ptr<datasource>> const& sources)
-    : per_file_metadata(metadatas_from_sources(sources)),
-      keyval_maps(collect_keyval_metadata()),
-      num_rows(calc_num_rows()),
-      num_row_groups(calc_num_row_groups())
-  {
-    // Verify that the input files have matching numbers of columns
-    size_type num_cols = -1;
-    for (auto const& pfm : per_file_metadata) {
-      if (pfm.row_groups.size() != 0) {
-        if (num_cols == -1)
-          num_cols = pfm.row_groups[0].columns.size();
-        else
-          CUDF_EXPECTS(num_cols == static_cast<size_type>(pfm.row_groups[0].columns.size()),
-                       "All sources must have the same number of columns");
-      }
-    }
-    // Verify that the input files have matching schemas
-    for (auto const& pfm : per_file_metadata) {
-      CUDF_EXPECTS(per_file_metadata[0].schema == pfm.schema,
-                   "All sources must have the same schemas");
-    }
-  }
-
-  [[nodiscard]] auto const& get_row_group(size_type row_group_index, size_type src_idx) const
-  {
-    CUDF_EXPECTS(src_idx >= 0 && src_idx < static_cast<size_type>(per_file_metadata.size()),
-                 "invalid source index");
-    return per_file_metadata[src_idx].row_groups[row_group_index];
-  }
-
-  [[nodiscard]] auto const& get_column_metadata(size_type row_group_index,
-                                                size_type src_idx,
-                                                int schema_idx) const
-  {
-    auto col = std::find_if(
-      per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(),
-      per_file_metadata[src_idx].row_groups[row_group_index].columns.end(),
-      [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx ? true : false; });
-    CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns),
-                 "Found no metadata for schema index");
-    return col->meta_data;
-  }
-
-  [[nodiscard]] auto get_num_rows() const { return num_rows; }
-
-  [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; }
-
-  [[nodiscard]] auto const& get_schema(int schema_idx) const
-  {
-    return per_file_metadata[0].schema[schema_idx];
-  }
-
-  [[nodiscard]] auto const& get_key_value_metadata() const { return keyval_maps; }
-
-  /**
-   * @brief Gets the concrete nesting depth of output cudf columns
-   *
-   * @param schema_index Schema index of the input column
-   *
-   * @return comma-separated index column names in quotes
-   */
-  [[nodiscard]] inline int get_output_nesting_depth(int schema_index) const
-  {
-    auto& pfm = per_file_metadata[0];
-    int depth = 0;
-
-    // walk upwards, skipping repeated fields
-    while (schema_index > 0) {
-      if (!pfm.schema[schema_index].is_stub()) { depth++; }
-      // schema of one-level encoding list doesn't contain nesting information, so we need to
-      // manually add an extra nesting level
-      if (pfm.schema[schema_index].is_one_level_list()) { depth++; }
-      schema_index = pfm.schema[schema_index].parent_idx;
-    }
-    return depth;
-  }
-
-  /**
-   * @brief Extracts the pandas "index_columns" section
-   *
-   * PANDAS adds its own metadata to the key_value section when writing out the
-   * dataframe to a file to aid in exact reconstruction. The JSON-formatted
-   * metadata contains the index column(s) and PANDA-specific datatypes.
-   *
-   * @return comma-separated index column names in quotes
-   */
-  [[nodiscard]] std::string get_pandas_index() const
-  {
-    // Assumes that all input files have the same metadata
-    // TODO: verify this assumption
-    auto it = keyval_maps[0].find("pandas");
-    if (it != keyval_maps[0].end()) {
-      // Captures a list of quoted strings found inside square brackets after `"index_columns":`
-      // Inside quotes supports newlines, brackets, escaped quotes, etc.
-      // One-liner regex:
-      // "index_columns"\s*:\s*\[\s*((?:"(?:|(?:.*?(?![^\\]")).?)[^\\]?",?\s*)*)\]
-      // Documented below.
-      std::regex index_columns_expr{
-        R"("index_columns"\s*:\s*\[\s*)"  // match preamble, opening square bracket, whitespace
-        R"(()"                            // Open first capturing group
-        R"((?:")"                         // Open non-capturing group match opening quote
-        R"((?:|(?:.*?(?![^\\]")).?))"     // match empty string or anything between quotes
-        R"([^\\]?")"                      // Match closing non-escaped quote
-        R"(,?\s*)"                        // Match optional comma and whitespace
-        R"()*)"                           // Close non-capturing group and repeat 0 or more times
-        R"())"                            // Close first capturing group
-        R"(\])"                           // Match closing square brackets
-      };
-      std::smatch sm;
-      if (std::regex_search(it->second, sm, index_columns_expr)) { return sm[1].str(); }
-    }
-    return "";
-  }
-
-  /**
-   * @brief Extracts the column name(s) used for the row indexes in a dataframe
-   *
-   * @param names List of column names to load, where index column name(s) will be added
-   */
-  [[nodiscard]] std::vector<std::string> get_pandas_index_names() const
-  {
-    std::vector<std::string> names;
-    auto str = get_pandas_index();
-    if (str.length() != 0) {
-      std::regex index_name_expr{R"(\"((?:\\.|[^\"])*)\")"};
-      std::smatch sm;
-      while (std::regex_search(str, sm, index_name_expr)) {
-        if (sm.size() == 2) {  // 2 = whole match, first item
-          if (std::find(names.begin(), names.end(), sm[1].str()) == names.end()) {
-            std::regex esc_quote{R"(\\")"};
-            names.emplace_back(std::regex_replace(sm[1].str(), esc_quote, R"(")"));
-          }
-        }
-        str = sm.suffix();
-      }
-    }
-    return names;
-  }
-
-  struct row_group_info {
-    size_type const index;
-    size_t const start_row;  // TODO source index
-    size_type const source_index;
-    row_group_info(size_type index, size_t start_row, size_type source_index)
-      : index(index), start_row(start_row), source_index(source_index)
-    {
-    }
-  };
-
-  /**
-   * @brief Filters and reduces down to a selection of row groups
-   *
-   * @param row_groups Lists of row groups to read, one per source
-   * @param row_start Starting row of the selection
-   * @param row_count Total number of rows selected
-   *
-   * @return List of row group indexes and its starting row
-   */
-  [[nodiscard]] auto select_row_groups(std::vector<std::vector<size_type>> const& row_groups,
-                                       size_type& row_start,
-                                       size_type& row_count) const
-  {
-    if (!row_groups.empty()) {
-      std::vector<row_group_info> selection;
-      CUDF_EXPECTS(row_groups.size() == per_file_metadata.size(),
-                   "Must specify row groups for each source");
-
-      row_count = 0;
-      for (size_t src_idx = 0; src_idx < row_groups.size(); ++src_idx) {
-        for (auto const& rowgroup_idx : row_groups[src_idx]) {
-          CUDF_EXPECTS(
-            rowgroup_idx >= 0 &&
-              rowgroup_idx < static_cast<size_type>(per_file_metadata[src_idx].row_groups.size()),
-            "Invalid rowgroup index");
-          selection.emplace_back(rowgroup_idx, row_count, src_idx);
-          row_count += get_row_group(rowgroup_idx, src_idx).num_rows;
-        }
-      }
-      return selection;
-    }
-
-    row_start = std::max(row_start, 0);
-    if (row_count < 0) {
-      row_count = static_cast<size_type>(
-        std::min<int64_t>(get_num_rows(), std::numeric_limits<size_type>::max()));
-    }
-    row_count = min(row_count, get_num_rows() - row_start);
-    CUDF_EXPECTS(row_count >= 0, "Invalid row count");
-    CUDF_EXPECTS(row_start <= get_num_rows(), "Invalid row start");
-
-    std::vector<row_group_info> selection;
-    size_type count = 0;
-    for (size_t src_idx = 0; src_idx < per_file_metadata.size(); ++src_idx) {
-      for (size_t rg_idx = 0; rg_idx < per_file_metadata[src_idx].row_groups.size(); ++rg_idx) {
-        auto const chunk_start_row = count;
-        count += get_row_group(rg_idx, src_idx).num_rows;
-        if (count > row_start || count == 0) {
-          selection.emplace_back(rg_idx, chunk_start_row, src_idx);
-        }
-        if (count >= row_start + row_count) { break; }
-      }
-    }
-
-    return selection;
-  }
-
-  /**
-   * @brief Filters and reduces down to a selection of columns
-   *
-   * @param use_names List of paths of column names to select; `nullopt` if user did not select
-   * columns to read
-   * @param include_index Whether to always include the PANDAS index column(s)
-   * @param strings_to_categorical Type conversion parameter
-   * @param timestamp_type_id Type conversion parameter
-   *
-   * @return input column information, output column information, list of output column schema
-   * indices
-   */
-  [[nodiscard]] auto select_columns(std::optional<std::vector<std::string>> const& use_names,
-                                    bool include_index,
-                                    bool strings_to_categorical,
-                                    type_id timestamp_type_id) const
-  {
-    auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) {
-      auto const& col_schema_idx = std::find_if(
-        schema_elem.children_idx.cbegin(),
-        schema_elem.children_idx.cend(),
-        [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; });
-
-      return (col_schema_idx != schema_elem.children_idx.end()) ? static_cast<int>(*col_schema_idx)
-                                                                : -1;
-    };
-
-    std::vector<column_buffer> output_columns;
-    std::vector<input_column_info> input_columns;
-    std::vector<int> nesting;
-
-    // Return true if column path is valid. e.g. if the path is {"struct1", "child1"}, then it is
-    // valid if "struct1.child1" exists in this file's schema. If "struct1" exists but "child1" is
-    // not a child of "struct1" then the function will return false for "struct1"
-    std::function<bool(column_name_info const*, int, std::vector<column_buffer>&, bool)>
-      build_column = [&](column_name_info const* col_name_info,
-                         int schema_idx,
-                         std::vector<column_buffer>& out_col_array,
-                         bool has_list_parent) {
-        if (schema_idx < 0) { return false; }
-        auto const& schema_elem = get_schema(schema_idx);
-
-        // if schema_elem is a stub then it does not exist in the column_name_info and column_buffer
-        // hierarchy. So continue on
-        if (schema_elem.is_stub()) {
-          // is this legit?
-          CUDF_EXPECTS(schema_elem.num_children == 1, "Unexpected number of children for stub");
-          auto child_col_name_info = (col_name_info) ? &col_name_info->children[0] : nullptr;
-          return build_column(
-            child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent);
-        }
-
-        // if we're at the root, this is a new output column
-        auto const col_type =
-          schema_elem.is_one_level_list()
-            ? type_id::LIST
-            : to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
-        auto const dtype = to_data_type(col_type, schema_elem);
-
-        column_buffer output_col(dtype, schema_elem.repetition_type == OPTIONAL);
-        if (has_list_parent) { output_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT; }
-        // store the index of this element if inserted in out_col_array
-        nesting.push_back(static_cast<int>(out_col_array.size()));
-        output_col.name = schema_elem.name;
-
-        // build each child
-        bool path_is_valid = false;
-        if (col_name_info == nullptr or col_name_info->children.empty()) {
-          // add all children of schema_elem.
-          // At this point, we can no longer pass a col_name_info to build_column
-          for (int idx = 0; idx < schema_elem.num_children; idx++) {
-            path_is_valid |= build_column(nullptr,
-                                          schema_elem.children_idx[idx],
-                                          output_col.children,
-                                          has_list_parent || col_type == type_id::LIST);
-          }
-        } else {
-          for (size_t idx = 0; idx < col_name_info->children.size(); idx++) {
-            path_is_valid |=
-              build_column(&col_name_info->children[idx],
-                           find_schema_child(schema_elem, col_name_info->children[idx].name),
-                           output_col.children,
-                           has_list_parent || col_type == type_id::LIST);
-          }
-        }
-
-        // if I have no children, we're at a leaf and I'm an input column (that is, one with actual
-        // data stored) so add me to the list.
-        if (schema_elem.num_children == 0) {
-          input_column_info& input_col = input_columns.emplace_back(
-            input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0});
-
-          // set up child output column for one-level encoding list
-          if (schema_elem.is_one_level_list()) {
-            // determine the element data type
-            auto const element_type =
-              to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
-            auto const element_dtype = to_data_type(element_type, schema_elem);
-
-            column_buffer element_col(element_dtype, schema_elem.repetition_type == OPTIONAL);
-            if (has_list_parent || col_type == type_id::LIST) {
-              element_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT;
-            }
-            // store the index of this element
-            nesting.push_back(static_cast<int>(output_col.children.size()));
-            // TODO: not sure if we should assign a name or leave it blank
-            element_col.name = "element";
-
-            output_col.children.push_back(std::move(element_col));
-          }
-
-          std::copy(nesting.cbegin(), nesting.cend(), std::back_inserter(input_col.nesting));
-
-          // pop off the extra nesting element.
-          if (schema_elem.is_one_level_list()) { nesting.pop_back(); }
-
-          path_is_valid = true;  // If we're able to reach leaf then path is valid
-        }
-
-        if (path_is_valid) { out_col_array.push_back(std::move(output_col)); }
-
-        nesting.pop_back();
-        return path_is_valid;
-      };
-
-    std::vector<int> output_column_schemas;
-
-    //
-    // there is not necessarily a 1:1 mapping between input columns and output columns.
-    // For example, parquet does not explicitly store a ColumnChunkDesc for struct columns.
-    // The "structiness" is simply implied by the schema.  For example, this schema:
-    //  required group field_id=1 name {
-    //    required binary field_id=2 firstname (String);
-    //    required binary field_id=3 middlename (String);
-    //    required binary field_id=4 lastname (String);
-    // }
-    // will only contain 3 internal columns of data (firstname, middlename, lastname).  But of
-    // course "name" is ultimately the struct column we want to return.
-    //
-    // "firstname", "middlename" and "lastname" represent the input columns in the file that we
-    // process to produce the final cudf "name" column.
-    //
-    // A user can ask for a single field out of the struct e.g. firstname.
-    // In this case they'll pass a fully qualified name to the schema element like
-    // ["name", "firstname"]
-    //
-    auto const& root = get_schema(0);
-    if (not use_names.has_value()) {
-      for (auto const& schema_idx : root.children_idx) {
-        build_column(nullptr, schema_idx, output_columns, false);
-        output_column_schemas.push_back(schema_idx);
-      }
-    } else {
-      struct path_info {
-        std::string full_path;
-        int schema_idx;
-      };
-
-      // Convert schema into a vector of every possible path
-      std::vector<path_info> all_paths;
-      std::function<void(std::string, int)> add_path = [&](std::string path_till_now,
-                                                           int schema_idx) {
-        auto const& schema_elem = get_schema(schema_idx);
-        std::string curr_path   = path_till_now + schema_elem.name;
-        all_paths.push_back({curr_path, schema_idx});
-        for (auto const& child_idx : schema_elem.children_idx) {
-          add_path(curr_path + ".", child_idx);
-        }
-      };
-      for (auto const& child_idx : get_schema(0).children_idx) {
-        add_path("", child_idx);
-      }
-
-      // Find which of the selected paths are valid and get their schema index
-      std::vector<path_info> valid_selected_paths;
-      for (auto const& selected_path : *use_names) {
-        auto found_path =
-          std::find_if(all_paths.begin(), all_paths.end(), [&](path_info& valid_path) {
-            return valid_path.full_path == selected_path;
-          });
-        if (found_path != all_paths.end()) {
-          valid_selected_paths.push_back({selected_path, found_path->schema_idx});
-        }
-      }
-
-      // Now construct paths as vector of strings for further consumption
-      std::vector<std::vector<std::string>> use_names3;
-      std::transform(valid_selected_paths.begin(),
-                     valid_selected_paths.end(),
-                     std::back_inserter(use_names3),
-                     [&](path_info const& valid_path) {
-                       auto schema_idx = valid_path.schema_idx;
-                       std::vector<std::string> result_path;
-                       do {
-                         SchemaElement const& elem = get_schema(schema_idx);
-                         result_path.push_back(elem.name);
-                         schema_idx = elem.parent_idx;
-                       } while (schema_idx > 0);
-                       return std::vector<std::string>(result_path.rbegin(), result_path.rend());
-                     });
-
-      std::vector<column_name_info> selected_columns;
-      if (include_index) {
-        std::vector<std::string> index_names = get_pandas_index_names();
-        std::transform(index_names.cbegin(),
-                       index_names.cend(),
-                       std::back_inserter(selected_columns),
-                       [](std::string const& name) { return column_name_info(name); });
-      }
-      // Merge the vector use_names into a set of hierarchical column_name_info objects
-      /* This is because if we have columns like this:
-       *     col1
-       *      / \
-       *    s3   f4
-       *   / \
-       * f5   f6
-       *
-       * there may be common paths in use_names like:
-       * {"col1", "s3", "f5"}, {"col1", "f4"}
-       * which means we want the output to contain
-       *     col1
-       *      / \
-       *    s3   f4
-       *   /
-       * f5
-       *
-       * rather than
-       *  col1   col1
-       *   |      |
-       *   s3     f4
-       *   |
-       *   f5
-       */
-      for (auto const& path : use_names3) {
-        auto array_to_find_in = &selected_columns;
-        for (size_t depth = 0; depth < path.size(); ++depth) {
-          // Check if the path exists in our selected_columns and if not, add it.
-          auto const& name_to_find = path[depth];
-          auto found_col           = std::find_if(
-            array_to_find_in->begin(),
-            array_to_find_in->end(),
-            [&name_to_find](column_name_info const& col) { return col.name == name_to_find; });
-          if (found_col == array_to_find_in->end()) {
-            auto& col        = array_to_find_in->emplace_back(name_to_find);
-            array_to_find_in = &col.children;
-          } else {
-            // Path exists. go down further.
-            array_to_find_in = &found_col->children;
-          }
-        }
-      }
-      for (auto& col : selected_columns) {
-        auto const& top_level_col_schema_idx = find_schema_child(root, col.name);
-        bool valid_column = build_column(&col, top_level_col_schema_idx, output_columns, false);
-        if (valid_column) output_column_schemas.push_back(top_level_col_schema_idx);
-      }
-    }
-
-    return std::make_tuple(
-      std::move(input_columns), std::move(output_columns), std::move(output_column_schemas));
-  }
-};
-
-/**
- * @brief Generate depth remappings for repetition and definition levels.
- *
- * When dealing with columns that contain lists, we must examine incoming
- * repetition and definition level pairs to determine what range of output nesting
- * is indicated when adding new values.  This function generates the mappings of
- * the R/D levels to those start/end bounds
- *
- * @param remap Maps column schema index to the R/D remapping vectors for that column
- * @param src_col_schema The column schema to generate the new mapping for
- * @param md File metadata information
- */
-void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::vector<int>>>& remap,
-                               int src_col_schema,
-                               aggregate_reader_metadata const& md)
-{
-  // already generated for this level
-  if (remap.find(src_col_schema) != remap.end()) { return; }
-  auto schema   = md.get_schema(src_col_schema);
-  int max_depth = md.get_output_nesting_depth(src_col_schema);
-
-  CUDF_EXPECTS(remap.find(src_col_schema) == remap.end(),
-               "Attempting to remap a schema more than once");
-  auto inserted =
-    remap.insert(std::pair<int, std::pair<std::vector<int>, std::vector<int>>>{src_col_schema, {}});
-  auto& depth_remap = inserted.first->second;
-
-  std::vector<int>& rep_depth_remap = (depth_remap.first);
-  rep_depth_remap.resize(schema.max_repetition_level + 1);
-  std::vector<int>& def_depth_remap = (depth_remap.second);
-  def_depth_remap.resize(schema.max_definition_level + 1);
-
-  // the key:
-  // for incoming level values  R/D
-  // add values starting at the shallowest nesting level X has repetition level R
-  // until you reach the deepest nesting level Y that corresponds to the repetition level R1
-  // held by the nesting level that has definition level D
-  //
-  // Example: a 3 level struct with a list at the bottom
-  //
-  //                     R / D   Depth
-  // level0              0 / 1     0
-  //   level1            0 / 2     1
-  //     level2          0 / 3     2
-  //       list          0 / 3     3
-  //         element     1 / 4     4
-  //
-  // incoming R/D : 0, 0  -> add values from depth 0 to 3   (def level 0 always maps to depth 0)
-  // incoming R/D : 0, 1  -> add values from depth 0 to 3
-  // incoming R/D : 0, 2  -> add values from depth 0 to 3
-  // incoming R/D : 1, 4  -> add values from depth 4 to 4
-  //
-  // Note : the -validity- of values is simply checked by comparing the incoming D value against the
-  // D value of the given nesting level (incoming D >= the D for the nesting level == valid,
-  // otherwise NULL).  The tricky part is determining what nesting levels to add values at.
-  //
-  // For schemas with no repetition level (no lists), X is always 0 and Y is always max nesting
-  // depth.
-  //
-
-  // compute "X" from above
-  for (int s_idx = schema.max_repetition_level; s_idx >= 0; s_idx--) {
-    auto find_shallowest = [&](int r) {
-      int shallowest = -1;
-      int cur_depth  = max_depth - 1;
-      int schema_idx = src_col_schema;
-      while (schema_idx > 0) {
-        auto cur_schema = md.get_schema(schema_idx);
-        if (cur_schema.max_repetition_level == r) {
-          // if this is a repeated field, map it one level deeper
-          shallowest = cur_schema.is_stub() ? cur_depth + 1 : cur_depth;
-        }
-        // if it's one-level encoding list
-        else if (cur_schema.is_one_level_list()) {
-          shallowest = cur_depth - 1;
-        }
-        if (!cur_schema.is_stub()) { cur_depth--; }
-        schema_idx = cur_schema.parent_idx;
-      }
-      return shallowest;
-    };
-    rep_depth_remap[s_idx] = find_shallowest(s_idx);
-  }
-
-  // compute "Y" from above
-  for (int s_idx = schema.max_definition_level; s_idx >= 0; s_idx--) {
-    auto find_deepest = [&](int d) {
-      SchemaElement prev_schema;
-      int schema_idx = src_col_schema;
-      int r1         = 0;
-      while (schema_idx > 0) {
-        SchemaElement cur_schema = md.get_schema(schema_idx);
-        if (cur_schema.max_definition_level == d) {
-          // if this is a repeated field, map it one level deeper
-          r1 = cur_schema.is_stub() ? prev_schema.max_repetition_level
-                                    : cur_schema.max_repetition_level;
-          break;
-        }
-        prev_schema = cur_schema;
-        schema_idx  = cur_schema.parent_idx;
-      }
-
-      // we now know R1 from above. return the deepest nesting level that has the
-      // same repetition level
-      schema_idx = src_col_schema;
-      int depth  = max_depth - 1;
-      while (schema_idx > 0) {
-        SchemaElement cur_schema = md.get_schema(schema_idx);
-        if (cur_schema.max_repetition_level == r1) {
-          // if this is a repeated field, map it one level deeper
-          depth = cur_schema.is_stub() ? depth + 1 : depth;
-          break;
-        }
-        if (!cur_schema.is_stub()) { depth--; }
-        prev_schema = cur_schema;
-        schema_idx  = cur_schema.parent_idx;
-      }
-      return depth;
-    };
-    def_depth_remap[s_idx] = find_deepest(s_idx);
-  }
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::read_column_chunks
- */
-std::future<void> reader::impl::read_column_chunks(
-  std::vector<std::unique_ptr<datasource::buffer>>& page_data,
-  hostdevice_vector<gpu::ColumnChunkDesc>& chunks,  // TODO const?
-  size_t begin_chunk,
-  size_t end_chunk,
-  const std::vector<size_t>& column_chunk_offsets,
-  std::vector<size_type> const& chunk_source_map)
-{
-  // Transfer chunk data, coalescing adjacent chunks
-  std::vector<std::future<size_t>> read_tasks;
-  for (size_t chunk = begin_chunk; chunk < end_chunk;) {
-    const size_t io_offset   = column_chunk_offsets[chunk];
-    size_t io_size           = chunks[chunk].compressed_size;
-    size_t next_chunk        = chunk + 1;
-    const bool is_compressed = (chunks[chunk].codec != parquet::Compression::UNCOMPRESSED);
-    while (next_chunk < end_chunk) {
-      const size_t next_offset = column_chunk_offsets[next_chunk];
-      const bool is_next_compressed =
-        (chunks[next_chunk].codec != parquet::Compression::UNCOMPRESSED);
-      if (next_offset != io_offset + io_size || is_next_compressed != is_compressed) {
-        // Can't merge if not contiguous or mixing compressed and uncompressed
-        // Not coalescing uncompressed with compressed chunks is so that compressed buffers can be
-        // freed earlier (immediately after decompression stage) to limit peak memory requirements
-        break;
-      }
-      io_size += chunks[next_chunk].compressed_size;
-      next_chunk++;
-    }
-    if (io_size != 0) {
-      auto& source = _sources[chunk_source_map[chunk]];
-      if (source->is_device_read_preferred(io_size)) {
-        auto buffer        = rmm::device_buffer(io_size, _stream);
-        auto fut_read_size = source->device_read_async(
-          io_offset, io_size, static_cast<uint8_t*>(buffer.data()), _stream);
-        read_tasks.emplace_back(std::move(fut_read_size));
-        page_data[chunk] = datasource::buffer::create(std::move(buffer));
-      } else {
-        auto const buffer = source->host_read(io_offset, io_size);
-        page_data[chunk] =
-          datasource::buffer::create(rmm::device_buffer(buffer->data(), buffer->size(), _stream));
-      }
-      auto d_compdata = page_data[chunk]->data();
-      do {
-        chunks[chunk].compressed_data = d_compdata;
-        d_compdata += chunks[chunk].compressed_size;
-      } while (++chunk != next_chunk);
-    } else {
-      chunk = next_chunk;
-    }
-  }
-  auto sync_fn = [](decltype(read_tasks) read_tasks) {
-    for (auto& task : read_tasks) {
-      task.wait();
-    }
-  };
-  return std::async(std::launch::deferred, sync_fn, std::move(read_tasks));
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::count_page_headers
- */
-size_t reader::impl::count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks)
-{
-  size_t total_pages = 0;
-
-  chunks.host_to_device(_stream);
-  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), _stream);
-  chunks.device_to_host(_stream, true);
-
-  for (size_t c = 0; c < chunks.size(); c++) {
-    total_pages += chunks[c].num_data_pages + chunks[c].num_dict_pages;
-  }
-
-  return total_pages;
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::decode_page_headers
- */
-void reader::impl::decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                       hostdevice_vector<gpu::PageInfo>& pages)
-{
-  // IMPORTANT : if you change how pages are stored within a chunk (dist pages, then data pages),
-  // please update preprocess_nested_columns to reflect this.
-  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-    chunks[c].max_num_pages = chunks[c].num_data_pages + chunks[c].num_dict_pages;
-    chunks[c].page_info     = pages.device_ptr(page_count);
-    page_count += chunks[c].max_num_pages;
-  }
-
-  chunks.host_to_device(_stream);
-  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), _stream);
-  pages.device_to_host(_stream, true);
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::decompress_page_data
- */
-rmm::device_buffer reader::impl::decompress_page_data(
-  hostdevice_vector<gpu::ColumnChunkDesc>& chunks, hostdevice_vector<gpu::PageInfo>& pages)
-{
-  auto for_each_codec_page = [&](parquet::Compression codec, const std::function<void(size_t)>& f) {
-    for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-      const auto page_stride = chunks[c].max_num_pages;
-      if (chunks[c].codec == codec) {
-        for (int k = 0; k < page_stride; k++) {
-          f(page_count + k);
-        }
-      }
-      page_count += page_stride;
-    }
-  };
-
-  // Brotli scratch memory for decompressing
-  rmm::device_buffer debrotli_scratch;
-
-  // Count the exact number of compressed pages
-  size_t num_comp_pages    = 0;
-  size_t total_decomp_size = 0;
-
-  struct codec_stats {
-    parquet::Compression compression_type = UNCOMPRESSED;
-    size_t num_pages                      = 0;
-    int32_t max_decompressed_size         = 0;
-    size_t total_decomp_size              = 0;
-  };
-
-  std::array codecs{codec_stats{parquet::GZIP},
-                    codec_stats{parquet::SNAPPY},
-                    codec_stats{parquet::BROTLI},
-                    codec_stats{parquet::ZSTD}};
-
-  auto is_codec_supported = [&codecs](int8_t codec) {
-    if (codec == parquet::UNCOMPRESSED) return true;
-    return std::find_if(codecs.begin(), codecs.end(), [codec](auto& cstats) {
-             return codec == cstats.compression_type;
-           }) != codecs.end();
-  };
-  CUDF_EXPECTS(std::all_of(chunks.begin(),
-                           chunks.end(),
-                           [&is_codec_supported](auto const& chunk) {
-                             return is_codec_supported(chunk.codec);
-                           }),
-               "Unsupported compression type");
-
-  for (auto& codec : codecs) {
-    for_each_codec_page(codec.compression_type, [&](size_t page) {
-      auto page_uncomp_size = pages[page].uncompressed_page_size;
-      total_decomp_size += page_uncomp_size;
-      codec.total_decomp_size += page_uncomp_size;
-      codec.max_decompressed_size = std::max(codec.max_decompressed_size, page_uncomp_size);
-      codec.num_pages++;
-      num_comp_pages++;
-    });
-    if (codec.compression_type == parquet::BROTLI && codec.num_pages > 0) {
-      debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), _stream);
-    }
-  }
-
-  // Dispatch batches of pages to decompress for each codec
-  rmm::device_buffer decomp_pages(total_decomp_size, _stream);
-
-  std::vector<device_span<uint8_t const>> comp_in;
-  comp_in.reserve(num_comp_pages);
-  std::vector<device_span<uint8_t>> comp_out;
-  comp_out.reserve(num_comp_pages);
-
-  // vectors to save v2 def and rep level data, if any
-  std::vector<device_span<uint8_t const>> copy_in;
-  copy_in.reserve(num_comp_pages);
-  std::vector<device_span<uint8_t>> copy_out;
-  copy_out.reserve(num_comp_pages);
-
-  rmm::device_uvector<compression_result> comp_res(num_comp_pages, _stream);
-  thrust::fill(rmm::exec_policy(_stream),
-               comp_res.begin(),
-               comp_res.end(),
-               compression_result{0, compression_status::FAILURE});
-
-  size_t decomp_offset = 0;
-  int32_t start_pos    = 0;
-  for (const auto& codec : codecs) {
-    if (codec.num_pages == 0) { continue; }
-
-    for_each_codec_page(codec.compression_type, [&](size_t page_idx) {
-      auto const dst_base = static_cast<uint8_t*>(decomp_pages.data()) + decomp_offset;
-      auto& page          = pages[page_idx];
-      // offset will only be non-zero for V2 pages
-      auto const offset = page.def_lvl_bytes + page.rep_lvl_bytes;
-      // for V2 need to copy def and rep level info into place, and then offset the
-      // input and output buffers. otherwise we'd have to keep both the compressed
-      // and decompressed data.
-      if (offset != 0) {
-        copy_in.emplace_back(page.page_data, offset);
-        copy_out.emplace_back(dst_base, offset);
-      }
-      comp_in.emplace_back(page.page_data + offset,
-                           static_cast<size_t>(page.compressed_page_size - offset));
-      comp_out.emplace_back(dst_base + offset,
-                            static_cast<size_t>(page.uncompressed_page_size - offset));
-      page.page_data = dst_base;
-      decomp_offset += page.uncompressed_page_size;
-    });
-
-    host_span<device_span<uint8_t const> const> comp_in_view{comp_in.data() + start_pos,
-                                                             codec.num_pages};
-    auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, _stream);
-    host_span<device_span<uint8_t> const> comp_out_view(comp_out.data() + start_pos,
-                                                        codec.num_pages);
-    auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, _stream);
-    device_span<compression_result> d_comp_res_view(comp_res.data() + start_pos, codec.num_pages);
-
-    switch (codec.compression_type) {
-      case parquet::GZIP:
-        gpuinflate(d_comp_in, d_comp_out, d_comp_res_view, gzip_header_included::YES, _stream);
-        break;
-      case parquet::SNAPPY:
-        if (nvcomp_integration::is_stable_enabled()) {
-          nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY,
-                                     d_comp_in,
-                                     d_comp_out,
-                                     d_comp_res_view,
-                                     codec.max_decompressed_size,
-                                     codec.total_decomp_size,
-                                     _stream);
-        } else {
-          gpu_unsnap(d_comp_in, d_comp_out, d_comp_res_view, _stream);
-        }
-        break;
-      case parquet::ZSTD:
-        nvcomp::batched_decompress(nvcomp::compression_type::ZSTD,
-                                   d_comp_in,
-                                   d_comp_out,
-                                   d_comp_res_view,
-                                   codec.max_decompressed_size,
-                                   codec.total_decomp_size,
-                                   _stream);
-        break;
-      case parquet::BROTLI:
-        gpu_debrotli(d_comp_in,
-                     d_comp_out,
-                     d_comp_res_view,
-                     debrotli_scratch.data(),
-                     debrotli_scratch.size(),
-                     _stream);
-        break;
-      default: CUDF_FAIL("Unexpected decompression dispatch"); break;
-    }
-    start_pos += codec.num_pages;
-  }
-
-  decompress_check(comp_res, _stream);
-
-  // now copy the uncompressed V2 def and rep level data
-  if (not copy_in.empty()) {
-    auto const d_copy_in  = cudf::detail::make_device_uvector_async(copy_in, _stream);
-    auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, _stream);
-
-    gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, _stream);
-    _stream.synchronize();
-  }
-
-  // Update the page information in device memory with the updated value of
-  // page_data; it now points to the uncompressed data buffer
-  pages.host_to_device(_stream);
-
-  return decomp_pages;
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::allocate_nesting_info
- */
-void reader::impl::allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                                         hostdevice_vector<gpu::PageInfo>& pages,
-                                         hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info)
-{
-  // compute total # of page_nesting infos needed and allocate space. doing this in one
-  // buffer to keep it to a single gpu allocation
-  size_t const total_page_nesting_infos = std::accumulate(
-    chunks.host_ptr(), chunks.host_ptr() + chunks.size(), 0, [&](int total, auto& chunk) {
-      // the schema of the input column
-      auto const& schema                    = _metadata->get_schema(chunk.src_col_schema);
-      auto const per_page_nesting_info_size = max(
-        schema.max_definition_level + 1, _metadata->get_output_nesting_depth(chunk.src_col_schema));
-      return total + (per_page_nesting_info_size * chunk.num_data_pages);
-    });
-
-  page_nesting_info = hostdevice_vector<gpu::PageNestingInfo>{total_page_nesting_infos, _stream};
-
-  // retrieve from the gpu so we can update
-  pages.device_to_host(_stream, true);
-
-  // update pointers in the PageInfos
-  int target_page_index = 0;
-  int src_info_index    = 0;
-  for (size_t idx = 0; idx < chunks.size(); idx++) {
-    int src_col_schema                    = chunks[idx].src_col_schema;
-    auto& schema                          = _metadata->get_schema(src_col_schema);
-    auto const per_page_nesting_info_size = std::max(
-      schema.max_definition_level + 1, _metadata->get_output_nesting_depth(src_col_schema));
-
-    // skip my dict pages
-    target_page_index += chunks[idx].num_dict_pages;
-    for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
-      pages[target_page_index + p_idx].nesting = page_nesting_info.device_ptr() + src_info_index;
-      pages[target_page_index + p_idx].num_nesting_levels = per_page_nesting_info_size;
-
-      src_info_index += per_page_nesting_info_size;
-    }
-    target_page_index += chunks[idx].num_data_pages;
-  }
-
-  // copy back to the gpu
-  pages.host_to_device(_stream);
-
-  // fill in
-  int nesting_info_index = 0;
-  std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
-  for (size_t idx = 0; idx < chunks.size(); idx++) {
-    int src_col_schema = chunks[idx].src_col_schema;
-
-    // schema of the input column
-    auto& schema = _metadata->get_schema(src_col_schema);
-    // real depth of the output cudf column hierarchy (1 == no nesting, 2 == 1 level, etc)
-    int max_depth = _metadata->get_output_nesting_depth(src_col_schema);
-
-    // # of nesting infos stored per page for this column
-    auto const per_page_nesting_info_size = std::max(schema.max_definition_level + 1, max_depth);
-
-    // if this column has lists, generate depth remapping
-    std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
-    if (schema.max_repetition_level > 0) {
-      generate_depth_remappings(depth_remapping, src_col_schema, *_metadata);
-    }
-
-    // fill in host-side nesting info
-    int schema_idx  = src_col_schema;
-    auto cur_schema = _metadata->get_schema(schema_idx);
-    int cur_depth   = max_depth - 1;
-    while (schema_idx > 0) {
-      // stub columns (basically the inner field of a list scheme element) are not real columns.
-      // we can ignore them for the purposes of output nesting info
-      if (!cur_schema.is_stub()) {
-        // initialize each page within the chunk
-        for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
-          gpu::PageNestingInfo* pni =
-            &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size)];
-
-          // if we have lists, set our start and end depth remappings
-          if (schema.max_repetition_level > 0) {
-            auto remap = depth_remapping.find(src_col_schema);
-            CUDF_EXPECTS(remap != depth_remapping.end(),
-                         "Could not find depth remapping for schema");
-            std::vector<int> const& rep_depth_remap = (remap->second.first);
-            std::vector<int> const& def_depth_remap = (remap->second.second);
-
-            for (size_t m = 0; m < rep_depth_remap.size(); m++) {
-              pni[m].start_depth = rep_depth_remap[m];
-            }
-            for (size_t m = 0; m < def_depth_remap.size(); m++) {
-              pni[m].end_depth = def_depth_remap[m];
-            }
-          }
-
-          // values indexed by output column index
-          pni[cur_depth].max_def_level = cur_schema.max_definition_level;
-          pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
-          pni[cur_depth].size          = 0;
-        }
-
-        // move up the hierarchy
-        cur_depth--;
-      }
-
-      // next schema
-      schema_idx = cur_schema.parent_idx;
-      cur_schema = _metadata->get_schema(schema_idx);
-    }
-
-    nesting_info_index += (per_page_nesting_info_size * chunks[idx].num_data_pages);
-  }
-
-  // copy nesting info to the device
-  page_nesting_info.host_to_device(_stream);
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::preprocess_columns
- */
-void reader::impl::preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                      hostdevice_vector<gpu::PageInfo>& pages,
-                                      size_t min_row,
-                                      size_t total_rows,
-                                      bool uses_custom_row_bounds)
-{
-  // iterate over all input columns and allocate any associated output
-  // buffers if they are not part of a list hierarchy. mark down
-  // if we have any list columns that need further processing.
-  bool has_lists = false;
-  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    auto const& input_col  = _input_columns[idx];
-    size_t const max_depth = input_col.nesting_depth();
-
-    auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
-      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
-        has_lists = true;
-      }
-      // if we haven't already processed this column because it is part of a struct hierarchy
-      else if (out_buf.size == 0) {
-        // add 1 for the offset if this is a list column
-        out_buf.create(
-          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows,
-          _stream,
-          _mr);
-      }
-    }
-  }
-
-  // if we have columns containing lists, further preprocessing is necessary.
-  if (has_lists) {
-    gpu::PreprocessColumnData(pages,
-                              chunks,
-                              _input_columns,
-                              _output_columns,
-                              total_rows,
-                              min_row,
-                              uses_custom_row_bounds,
-                              _stream,
-                              _mr);
-    _stream.synchronize();
-  }
-}
-
-/**
- * @copydoc cudf::io::detail::parquet::decode_page_data
- */
-void reader::impl::decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                    hostdevice_vector<gpu::PageInfo>& pages,
-                                    hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
-                                    size_t min_row,
-                                    size_t total_rows)
-{
-  auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
-    return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
-  };
-
-  // Count the number of string dictionary entries
-  // NOTE: Assumes first page in the chunk is always the dictionary page
-  size_t total_str_dict_indexes = 0;
-  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-    if (is_dict_chunk(chunks[c])) { total_str_dict_indexes += pages[page_count].num_input_values; }
-    page_count += chunks[c].max_num_pages;
-  }
-
-  // Build index for string dictionaries since they can't be indexed
-  // directly due to variable-sized elements
-  auto str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
-    total_str_dict_indexes, _stream);
-
-  // TODO (dm): hd_vec should have begin and end iterator members
-  size_t sum_max_depths =
-    std::accumulate(chunks.host_ptr(),
-                    chunks.host_ptr(chunks.size()),
-                    0,
-                    [&](size_t cursum, gpu::ColumnChunkDesc const& chunk) {
-                      return cursum + _metadata->get_output_nesting_depth(chunk.src_col_schema);
-                    });
-
-  // In order to reduce the number of allocations of hostdevice_vector, we allocate a single vector
-  // to store all per-chunk pointers to nested data/nullmask. `chunk_offsets[i]` will store the
-  // offset into `chunk_nested_data`/`chunk_nested_valids` for the array of pointers for chunk `i`
-  auto chunk_nested_valids = hostdevice_vector<uint32_t*>(sum_max_depths, _stream);
-  auto chunk_nested_data   = hostdevice_vector<void*>(sum_max_depths, _stream);
-  auto chunk_offsets       = std::vector<size_t>();
-
-  // Update chunks with pointers to column data.
-  for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) {
-    input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
-    CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema,
-                 "Column/page schema index mismatch");
-
-    if (is_dict_chunk(chunks[c])) {
-      chunks[c].str_dict_index = str_dict_index.data() + str_ofs;
-      str_ofs += pages[page_count].num_input_values;
-    }
-
-    size_t max_depth = _metadata->get_output_nesting_depth(chunks[c].src_col_schema);
-    chunk_offsets.push_back(chunk_off);
-
-    // get a slice of size `nesting depth` from `chunk_nested_valids` to store an array of pointers
-    // to validity data
-    auto valids              = chunk_nested_valids.host_ptr(chunk_off);
-    chunks[c].valid_map_base = chunk_nested_valids.device_ptr(chunk_off);
-
-    // get a slice of size `nesting depth` from `chunk_nested_data` to store an array of pointers to
-    // out data
-    auto data                  = chunk_nested_data.host_ptr(chunk_off);
-    chunks[c].column_data_base = chunk_nested_data.device_ptr(chunk_off);
-
-    chunk_off += max_depth;
-
-    // fill in the arrays on the host.  there are some important considerations to
-    // take into account here for nested columns.  specifically, with structs
-    // there is sharing of output buffers between input columns.  consider this schema
-    //
-    //  required group field_id=1 name {
-    //    required binary field_id=2 firstname (String);
-    //    required binary field_id=3 middlename (String);
-    //    required binary field_id=4 lastname (String);
-    // }
-    //
-    // there are 3 input columns of data here (firstname, middlename, lastname), but
-    // only 1 output column (name).  The structure of the output column buffers looks like
-    // the schema itself
-    //
-    // struct      (name)
-    //     string  (firstname)
-    //     string  (middlename)
-    //     string  (lastname)
-    //
-    // The struct column can contain validity information. the problem is, the decode
-    // step for the input columns will all attempt to decode this validity information
-    // because each one has it's own copy of the repetition/definition levels. but
-    // since this is all happening in parallel it would mean multiple blocks would
-    // be stomping all over the same memory randomly.  to work around this, we set
-    // things up so that only 1 child of any given nesting level fills in the
-    // data (offsets in the case of lists) or validity information for the higher
-    // levels of the hierarchy that are shared.  In this case, it would mean we
-    // would just choose firstname to be the one that decodes the validity for name.
-    //
-    // we do this by only handing out the pointers to the first child we come across.
-    //
-    auto* cols = &_output_columns;
-    for (size_t idx = 0; idx < max_depth; idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[idx]];
-      cols          = &out_buf.children;
-
-      int owning_schema = out_buf.user_data & PARQUET_COLUMN_BUFFER_SCHEMA_MASK;
-      if (owning_schema == 0 || owning_schema == input_col.schema_idx) {
-        valids[idx] = out_buf.null_mask();
-        data[idx]   = out_buf.data();
-        out_buf.user_data |=
-          static_cast<uint32_t>(input_col.schema_idx) & PARQUET_COLUMN_BUFFER_SCHEMA_MASK;
-      } else {
-        valids[idx] = nullptr;
-        data[idx]   = nullptr;
-      }
-    }
-
-    // column_data_base will always point to leaf data, even for nested types.
-    page_count += chunks[c].max_num_pages;
-  }
-
-  chunks.host_to_device(_stream);
-  chunk_nested_valids.host_to_device(_stream);
-  chunk_nested_data.host_to_device(_stream);
-
-  if (total_str_dict_indexes > 0) {
-    gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
-  }
-
-  gpu::DecodePageData(pages, chunks, total_rows, min_row, _stream);
-  pages.device_to_host(_stream);
-  page_nesting.device_to_host(_stream);
-  _stream.synchronize();
-
-  // for list columns, add the final offset to every offset buffer.
-  // TODO : make this happen in more efficiently. Maybe use thrust::for_each
-  // on each buffer.  Or potentially do it in PreprocessColumnData
-  // Note : the reason we are doing this here instead of in the decode kernel is
-  // that it is difficult/impossible for a given page to know that it is writing the very
-  // last value that should then be followed by a terminator (because rows can span
-  // page boundaries).
-  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
-    input_column_info const& input_col = _input_columns[idx];
-
-    auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      if (out_buf.type.id() != type_id::LIST ||
-          (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED)) {
-        continue;
-      }
-      CUDF_EXPECTS(l_idx < input_col.nesting_depth() - 1, "Encountered a leaf list column");
-      auto& child = (*cols)[input_col.nesting[l_idx + 1]];
-
-      // the final offset for a list at level N is the size of it's child
-      int offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size;
-      cudaMemcpyAsync(static_cast<int32_t*>(out_buf.data()) + (out_buf.size - 1),
-                      &offset,
-                      sizeof(offset),
-                      cudaMemcpyHostToDevice,
-                      _stream.value());
-      out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED;
-    }
-  }
-
-  // update null counts in the final column buffers
-  for (size_t idx = 0; idx < pages.size(); idx++) {
-    gpu::PageInfo* pi = &pages[idx];
-    if (pi->flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; }
-    gpu::ColumnChunkDesc* col          = &chunks[pi->chunk_idx];
-    input_column_info const& input_col = _input_columns[col->src_col_index];
-
-    int index                 = pi->nesting - page_nesting.device_ptr();
-    gpu::PageNestingInfo* pni = &page_nesting[index];
-
-    auto* cols = &_output_columns;
-    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // if I wasn't the one who wrote out the validity bits, skip it
-      if (chunk_nested_valids.host_ptr(chunk_offsets[pi->chunk_idx])[l_idx] == nullptr) {
-        continue;
-      }
-      out_buf.null_count() += pni[l_idx].null_count;
-    }
-  }
-
-  _stream.synchronize();
-}
-
-reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
-                   parquet_reader_options const& options,
-                   rmm::cuda_stream_view stream,
-                   rmm::mr::device_memory_resource* mr)
-  : _stream(stream), _mr(mr), _sources(std::move(sources))
-{
-  // Open and parse the source dataset metadata
-  _metadata = std::make_unique<aggregate_reader_metadata>(_sources);
-
-  // Override output timestamp resolution if requested
-  if (options.get_timestamp_type().id() != type_id::EMPTY) {
-    _timestamp_type = options.get_timestamp_type();
-  }
-
-  // Strings may be returned as either string or categorical columns
-  _strings_to_categorical = options.is_enabled_convert_strings_to_categories();
-
-  // Binary columns can be read as binary or strings
-  _reader_column_schema = options.get_column_schema();
-
-  // Select only columns required by the options
-  std::tie(_input_columns, _output_columns, _output_column_schemas) =
-    _metadata->select_columns(options.get_columns(),
-                              options.is_enabled_use_pandas_metadata(),
-                              _strings_to_categorical,
-                              _timestamp_type.id());
-}
-
-table_with_metadata reader::impl::read(size_type skip_rows,
-                                       size_type num_rows,
-                                       bool uses_custom_row_bounds,
-                                       std::vector<std::vector<size_type>> const& row_group_list)
-{
-  // Select only row groups required
-  const auto selected_row_groups =
-    _metadata->select_row_groups(row_group_list, skip_rows, num_rows);
-
-  table_metadata out_metadata;
-
-  // output cudf columns as determined by the top level schema
-  std::vector<std::unique_ptr<column>> out_columns;
-  out_columns.reserve(_output_columns.size());
-
-  if (selected_row_groups.size() != 0 && _input_columns.size() != 0) {
-    // Descriptors for all the chunks that make up the selected columns
-    const auto num_input_columns = _input_columns.size();
-    const auto num_chunks        = selected_row_groups.size() * num_input_columns;
-    hostdevice_vector<gpu::ColumnChunkDesc> chunks(0, num_chunks, _stream);
-
-    // Association between each column chunk and its source
-    std::vector<size_type> chunk_source_map(num_chunks);
-
-    // Tracker for eventually deallocating compressed and uncompressed data
-    std::vector<std::unique_ptr<datasource::buffer>> page_data(num_chunks);
-
-    // Keep track of column chunk file offsets
-    std::vector<size_t> column_chunk_offsets(num_chunks);
-
-    // Initialize column chunk information
-    size_t total_decompressed_size = 0;
-    auto remaining_rows            = num_rows;
-    std::vector<std::future<void>> read_rowgroup_tasks;
-    for (const auto& rg : selected_row_groups) {
-      const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
-      auto const row_group_start  = rg.start_row;
-      auto const row_group_source = rg.source_index;
-      auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
-      auto const io_chunk_idx     = chunks.size();
-
-      // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
-      for (size_t i = 0; i < num_input_columns; ++i) {
-        auto col = _input_columns[i];
-        // look up metadata
-        auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
-        auto& schema   = _metadata->get_schema(col.schema_idx);
-
-        auto [type_width, clock_rate, converted_type] =
-          conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
-                          _timestamp_type.id(),
-                          schema.type,
-                          schema.converted_type,
-                          schema.type_length);
-
-        column_chunk_offsets[chunks.size()] =
-          (col_meta.dictionary_page_offset != 0)
-            ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
-            : col_meta.data_page_offset;
-
-        chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size,
-                                              nullptr,
-                                              col_meta.num_values,
-                                              schema.type,
-                                              type_width,
-                                              row_group_start,
-                                              row_group_rows,
-                                              schema.max_definition_level,
-                                              schema.max_repetition_level,
-                                              _metadata->get_output_nesting_depth(col.schema_idx),
-                                              required_bits(schema.max_definition_level),
-                                              required_bits(schema.max_repetition_level),
-                                              col_meta.codec,
-                                              converted_type,
-                                              schema.logical_type,
-                                              schema.decimal_scale,
-                                              clock_rate,
-                                              i,
-                                              col.schema_idx));
-
-        // Map each column chunk to its column index and its source index
-        chunk_source_map[chunks.size() - 1] = row_group_source;
-
-        if (col_meta.codec != Compression::UNCOMPRESSED) {
-          total_decompressed_size += col_meta.total_uncompressed_size;
-        }
-      }
-      // Read compressed chunk data to device memory
-      read_rowgroup_tasks.push_back(read_column_chunks(
-        page_data, chunks, io_chunk_idx, chunks.size(), column_chunk_offsets, chunk_source_map));
-
-      remaining_rows -= row_group.num_rows;
-    }
-    for (auto& task : read_rowgroup_tasks) {
-      task.wait();
-    }
-    assert(remaining_rows <= 0);
-
-    // Process dataset chunk pages into output columns
-    const auto total_pages = count_page_headers(chunks);
-    if (total_pages > 0) {
-      hostdevice_vector<gpu::PageInfo> pages(total_pages, total_pages, _stream);
-      rmm::device_buffer decomp_page_data;
-
-      // decoding of column/page information
-      decode_page_headers(chunks, pages);
-      if (total_decompressed_size > 0) {
-        decomp_page_data = decompress_page_data(chunks, pages);
-        // Free compressed data
-        for (size_t c = 0; c < chunks.size(); c++) {
-          if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) { page_data[c].reset(); }
-        }
-      }
-
-      // build output column info
-      // walk the schema, building out_buffers that mirror what our final cudf columns will look
-      // like. important : there is not necessarily a 1:1 mapping between input columns and output
-      // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
-      // columns. The "structiness" is simply implied by the schema.  For example, this schema:
-      //  required group field_id=1 name {
-      //    required binary field_id=2 firstname (String);
-      //    required binary field_id=3 middlename (String);
-      //    required binary field_id=4 lastname (String);
-      // }
-      // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
-      // "name" is a struct column that we want to return, so we have to make sure that we
-      // create it ourselves.
-      // std::vector<output_column_info> output_info = build_output_column_info();
-
-      // nesting information (sizes, etc) stored -per page-
-      // note : even for flat schemas, we allocate 1 level of "nesting" info
-      hostdevice_vector<gpu::PageNestingInfo> page_nesting_info;
-      allocate_nesting_info(chunks, pages, page_nesting_info);
-
-      // - compute column sizes and allocate output buffers.
-      //   important:
-      //   for nested schemas, we have to do some further preprocessing to determine:
-      //    - real column output sizes per level of nesting (in a flat schema, there's only 1 level
-      //    of
-      //      nesting and it's size is the row count)
-      //
-      // - for nested schemas, output buffer offset values per-page, per nesting-level for the
-      // purposes of decoding.
-      preprocess_columns(chunks, pages, skip_rows, num_rows, uses_custom_row_bounds);
-
-      // decoding of column data itself
-      decode_page_data(chunks, pages, page_nesting_info, skip_rows, num_rows);
-
-      // create the final output cudf columns
-      for (size_t i = 0; i < _output_columns.size(); ++i) {
-        column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-        auto const metadata =
-          _reader_column_schema.has_value()
-            ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
-            : std::nullopt;
-        out_columns.emplace_back(
-          make_column(_output_columns[i], &col_name, metadata, _stream, _mr));
-      }
-    }
-  }
-
-  // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
-  for (size_t i = out_columns.size(); i < _output_columns.size(); ++i) {
-    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-    out_columns.emplace_back(io::detail::empty_like(_output_columns[i], &col_name, _stream, _mr));
-  }
-
-  // Return column names (must match order of returned columns)
-  out_metadata.column_names.resize(_output_columns.size());
-  for (size_t i = 0; i < _output_column_schemas.size(); i++) {
-    auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
-    out_metadata.column_names[i] = schema.name;
-  }
-
-  // Return user metadata
-  out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
-  out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
-                            out_metadata.per_file_user_data[0].end()};
-
-  return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
-}
-
-// Forward to implementation
-reader::reader(std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
-               parquet_reader_options const& options,
-               rmm::cuda_stream_view stream,
-               rmm::mr::device_memory_resource* mr)
-  : _impl(std::make_unique<impl>(std::move(sources), options, stream, mr))
-{
-}
-
-// Destructor within this translation unit
-reader::~reader() = default;
-
-// Forward to implementation
-table_with_metadata reader::read(parquet_reader_options const& options)
-{
-  // if the user has specified custom row bounds
-  bool const uses_custom_row_bounds = options.get_num_rows() >= 0 || options.get_skip_rows() != 0;
-  return _impl->read(options.get_skip_rows(),
-                     options.get_num_rows(),
-                     uses_custom_row_bounds,
-                     options.get_row_groups());
-}
-
-}  // namespace parquet
-}  // namespace detail
-}  // namespace io
-}  // namespace cudf
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 6c3e05b4264..b53487c824b 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -21,32 +21,23 @@
 
 #pragma once
 
-#include "parquet.hpp"
 #include "parquet_gpu.hpp"
+#include "reader_impl_helpers.hpp"
 
 #include <io/utilities/column_buffer.hpp>
-#include <io/utilities/hostdevice_vector.hpp>
 
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/detail/parquet.hpp>
 #include <cudf/io/parquet.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/mr/device/device_memory_resource.hpp>
 
 #include <memory>
-#include <string>
-#include <utility>
+#include <optional>
 #include <vector>
 
-namespace cudf {
-namespace io {
-namespace detail {
-namespace parquet {
-using namespace cudf::io::parquet;
-using namespace cudf::io;
-
-// Forward declarations
-class aggregate_reader_metadata;
+namespace cudf::io::detail::parquet {
 
 /**
  * @brief Implementation for Parquet reader
@@ -71,8 +62,8 @@ class reader::impl {
    *
    * @param skip_rows Number of rows to skip from the start
    * @param num_rows Number of rows to read
-   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
-   * bounds
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
    * @param row_group_indices Lists of row groups to read, one per source
    *
    * @return The set of columns along with metadata
@@ -80,111 +71,79 @@ class reader::impl {
   table_with_metadata read(size_type skip_rows,
                            size_type num_rows,
                            bool uses_custom_row_bounds,
-                           std::vector<std::vector<size_type>> const& row_group_indices);
+                           host_span<std::vector<size_type> const> row_group_indices);
 
  private:
   /**
-   * @brief Reads compressed page data to device memory
-   *
-   * @param page_data Buffers to hold compressed page data for each chunk
-   * @param chunks List of column chunk descriptors
-   * @param begin_chunk Index of first column chunk to read
-   * @param end_chunk Index after the last column chunk to read
-   * @param column_chunk_offsets File offset for all chunks
+   * @brief Perform the necessary data preprocessing for parsing file later on.
    *
+   * @param skip_rows Number of rows to skip from the start
+   * @param num_rows Number of rows to read, or `-1` to read all rows
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
+   * @param row_group_indices Lists of row groups to read (one per source), or empty if read all
    */
-  std::future<void> read_column_chunks(std::vector<std::unique_ptr<datasource::buffer>>& page_data,
-                                       hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                       size_t begin_chunk,
-                                       size_t end_chunk,
-                                       const std::vector<size_t>& column_chunk_offsets,
-                                       std::vector<size_type> const& chunk_source_map);
+  void prepare_data(size_type skip_rows,
+                    size_type num_rows,
+                    bool uses_custom_row_bounds,
+                    host_span<std::vector<size_type> const> row_group_indices);
 
   /**
-   * @brief Returns the number of total pages from the given column chunks
-   *
-   * @param chunks List of column chunk descriptors
-   *
-   * @return The total number of pages
+   * @brief Load and decompress the input file(s) into memory.
    */
-  size_t count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks);
+  void load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
+                                size_type num_rows);
 
   /**
-   * @brief Returns the page information from the given column chunks.
+   * @brief Allocate nesting information storage for all pages and set pointers to it.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
+   * One large contiguous buffer of PageNestingInfo structs is allocated and
+   * distributed among the PageInfo structs.
+   *
+   * Note that this gets called even in the flat schema case so that we have a
+   * consistent place to store common information such as value counts, etc.
    */
-  void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                           hostdevice_vector<gpu::PageInfo>& pages);
+  void allocate_nesting_info();
 
   /**
-   * @brief Decompresses the page data, at page granularity.
+   * @brief Read a chunk of data and return an output table.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
+   * This function is called internally and expects all preprocessing steps have already been done.
    *
-   * @return Device buffer to decompressed page data
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
+   * @return The output table along with columns' metadata
    */
-  rmm::device_buffer decompress_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                                          hostdevice_vector<gpu::PageInfo>& pages);
+  table_with_metadata read_chunk_internal(bool uses_custom_row_bounds);
 
   /**
-   * @brief Allocate nesting information storage for all pages and set pointers
-   *        to it.
-   *
-   * One large contiguous buffer of PageNestingInfo structs is allocated and
-   * distributed among the PageInfo structs.
-   *
-   * Note that this gets called even in the flat schema case so that we have a
-   * consistent place to store common information such as value counts, etc.
+   * @brief Finalize the output table by adding empty columns for the non-selected columns in
+   * schema.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
-   * @param page_nesting_info The allocated nesting info structs.
+   * @param out_metadata The output table metadata
+   * @param out_columns The columns for building the output table
+   * @return The output table along with columns' metadata
    */
-  void allocate_nesting_info(hostdevice_vector<gpu::ColumnChunkDesc> const& chunks,
-                             hostdevice_vector<gpu::PageInfo>& pages,
-                             hostdevice_vector<gpu::PageNestingInfo>& page_nesting_info);
+  table_with_metadata finalize_output(table_metadata& out_metadata,
+                                      std::vector<std::unique_ptr<column>>& out_columns);
 
   /**
-   * @brief Preprocess column information and allocate output buffers.
-   *
-   * There are several pieces of information we can't compute directly from row counts in
-   * the parquet headers when dealing with nested schemas.
-   * - The total sizes of all output columns at all nesting levels
-   * - The starting output buffer offset for each page, for each nesting level
+   * @brief Allocate data bufers for the output columns.
    *
-   * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders)
-   *
-   * @param chunks All chunks to be decoded
-   * @param pages All pages to be decoded
-   * @param min_rows crop all rows below min_row
-   * @param total_rows Maximum number of rows to read
-   * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
-   * bounds
-   * a preprocess.
+   * @param skip_rows Crop all rows below skip_rows
+   * @param num_rows Maximum number of rows to read
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
    */
-  void preprocess_columns(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                          hostdevice_vector<gpu::PageInfo>& pages,
-                          size_t min_row,
-                          size_t total_rows,
-                          bool uses_custom_row_bounds);
+  void allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds);
 
   /**
    * @brief Converts the page data and outputs to columns.
    *
-   * @param chunks List of column chunk descriptors
-   * @param pages List of page information
-   * @param page_nesting Page nesting array
-   * @param min_row Minimum number of rows from start
-   * @param total_rows Number of rows to output
+   * @param skip_rows Minimum number of rows from start
+   * @param num_rows Number of rows to output
    */
-  void decode_page_data(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
-                        hostdevice_vector<gpu::PageInfo>& pages,
-                        hostdevice_vector<gpu::PageNestingInfo>& page_nesting,
-                        size_t min_row,
-                        size_t total_rows);
+  void decode_page_data(size_t skip_rows, size_t num_rows);
 
  private:
   rmm::cuda_stream_view _stream;
@@ -195,17 +154,21 @@ class reader::impl {
 
   // input columns to be processed
   std::vector<input_column_info> _input_columns;
-  // output columns to be generated
-  std::vector<column_buffer> _output_columns;
-  // _output_columns associated schema indices
+
+  // Buffers for generating output columns
+  std::vector<column_buffer> _output_buffers;
+
+  // _output_buffers associated schema indices
   std::vector<int> _output_column_schemas;
 
   bool _strings_to_categorical = false;
   std::optional<std::vector<reader_column_schema>> _reader_column_schema;
   data_type _timestamp_type{type_id::EMPTY};
+
+  cudf::io::parquet::gpu::file_intermediate_data _file_itm_data;
+
+  size_type _skip_rows{0};
+  size_type _num_rows{0};
 };
 
-}  // namespace parquet
-}  // namespace detail
-}  // namespace io
-}  // namespace cudf
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp
new file mode 100644
index 00000000000..7090df2cae0
--- /dev/null
+++ b/cpp/src/io/parquet/reader_impl_helpers.cpp
@@ -0,0 +1,629 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reader_impl_helpers.hpp"
+
+#include <numeric>
+#include <regex>
+
+namespace cudf::io::detail::parquet {
+
+namespace {
+
+ConvertedType logical_type_to_converted_type(LogicalType const& logical)
+{
+  if (logical.isset.STRING) {
+    return parquet::UTF8;
+  } else if (logical.isset.MAP) {
+    return parquet::MAP;
+  } else if (logical.isset.LIST) {
+    return parquet::LIST;
+  } else if (logical.isset.ENUM) {
+    return parquet::ENUM;
+  } else if (logical.isset.DECIMAL) {
+    return parquet::DECIMAL;  // TODO set decimal values
+  } else if (logical.isset.DATE) {
+    return parquet::DATE;
+  } else if (logical.isset.TIME) {
+    if (logical.TIME.unit.isset.MILLIS)
+      return parquet::TIME_MILLIS;
+    else if (logical.TIME.unit.isset.MICROS)
+      return parquet::TIME_MICROS;
+  } else if (logical.isset.TIMESTAMP) {
+    if (logical.TIMESTAMP.unit.isset.MILLIS)
+      return parquet::TIMESTAMP_MILLIS;
+    else if (logical.TIMESTAMP.unit.isset.MICROS)
+      return parquet::TIMESTAMP_MICROS;
+  } else if (logical.isset.INTEGER) {
+    switch (logical.INTEGER.bitWidth) {
+      case 8: return logical.INTEGER.isSigned ? INT_8 : UINT_8;
+      case 16: return logical.INTEGER.isSigned ? INT_16 : UINT_16;
+      case 32: return logical.INTEGER.isSigned ? INT_32 : UINT_32;
+      case 64: return logical.INTEGER.isSigned ? INT_64 : UINT_64;
+      default: break;
+    }
+  } else if (logical.isset.UNKNOWN) {
+    return parquet::NA;
+  } else if (logical.isset.JSON) {
+    return parquet::JSON;
+  } else if (logical.isset.BSON) {
+    return parquet::BSON;
+  }
+  return parquet::UNKNOWN;
+}
+
+}  // namespace
+
+/**
+ * @brief Function that translates Parquet datatype to cuDF type enum
+ */
+type_id to_type_id(SchemaElement const& schema,
+                   bool strings_to_categorical,
+                   type_id timestamp_type_id)
+{
+  parquet::Type const physical            = schema.type;
+  parquet::LogicalType const logical_type = schema.logical_type;
+  parquet::ConvertedType converted_type   = schema.converted_type;
+  int32_t decimal_scale                   = schema.decimal_scale;
+
+  // Logical type used for actual data interpretation; the legacy converted type
+  // is superceded by 'logical' type whenever available.
+  auto const inferred_converted_type = logical_type_to_converted_type(logical_type);
+  if (inferred_converted_type != parquet::UNKNOWN) converted_type = inferred_converted_type;
+  if (inferred_converted_type == parquet::DECIMAL && decimal_scale == 0)
+    decimal_scale = schema.logical_type.DECIMAL.scale;
+
+  switch (converted_type) {
+    case parquet::UINT_8: return type_id::UINT8;
+    case parquet::INT_8: return type_id::INT8;
+    case parquet::UINT_16: return type_id::UINT16;
+    case parquet::INT_16: return type_id::INT16;
+    case parquet::UINT_32: return type_id::UINT32;
+    case parquet::UINT_64: return type_id::UINT64;
+    case parquet::DATE: return type_id::TIMESTAMP_DAYS;
+    case parquet::TIME_MILLIS: return type_id::DURATION_MILLISECONDS;
+    case parquet::TIME_MICROS: return type_id::DURATION_MICROSECONDS;
+    case parquet::TIMESTAMP_MILLIS:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::TIMESTAMP_MILLISECONDS;
+    case parquet::TIMESTAMP_MICROS:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::TIMESTAMP_MICROSECONDS;
+    case parquet::DECIMAL:
+      if (physical == parquet::INT32) { return type_id::DECIMAL32; }
+      if (physical == parquet::INT64) { return type_id::DECIMAL64; }
+      if (physical == parquet::FIXED_LEN_BYTE_ARRAY) {
+        if (schema.type_length <= static_cast<int32_t>(sizeof(int32_t))) {
+          return type_id::DECIMAL32;
+        }
+        if (schema.type_length <= static_cast<int32_t>(sizeof(int64_t))) {
+          return type_id::DECIMAL64;
+        }
+        if (schema.type_length <= static_cast<int32_t>(sizeof(__int128_t))) {
+          return type_id::DECIMAL128;
+        }
+      }
+      CUDF_FAIL("Invalid representation of decimal type");
+      break;
+
+    // maps are just List<Struct<>>.
+    case parquet::MAP:
+    case parquet::LIST: return type_id::LIST;
+    case parquet::NA: return type_id::STRING;
+    // return type_id::EMPTY; //TODO(kn): enable after Null/Empty column support
+    default: break;
+  }
+
+  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
+      logical_type.TIMESTAMP.unit.isset.NANOS) {
+    return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                 : type_id::TIMESTAMP_NANOSECONDS;
+  }
+
+  if (inferred_converted_type == parquet::UNKNOWN and physical == parquet::INT64 and
+      logical_type.TIME.unit.isset.NANOS) {
+    return type_id::DURATION_NANOSECONDS;
+  }
+
+  // is it simply a struct?
+  if (schema.is_struct()) { return type_id::STRUCT; }
+
+  // Physical storage type supported by Parquet; controls the on-disk storage
+  // format in combination with the encoding type.
+  switch (physical) {
+    case parquet::BOOLEAN: return type_id::BOOL8;
+    case parquet::INT32: return type_id::INT32;
+    case parquet::INT64: return type_id::INT64;
+    case parquet::FLOAT: return type_id::FLOAT32;
+    case parquet::DOUBLE: return type_id::FLOAT64;
+    case parquet::BYTE_ARRAY:
+    case parquet::FIXED_LEN_BYTE_ARRAY:
+      // Can be mapped to INT32 (32-bit hash) or STRING
+      return strings_to_categorical ? type_id::INT32 : type_id::STRING;
+    case parquet::INT96:
+      return (timestamp_type_id != type_id::EMPTY) ? timestamp_type_id
+                                                   : type_id::TIMESTAMP_NANOSECONDS;
+    default: break;
+  }
+
+  return type_id::EMPTY;
+}
+
+metadata::metadata(datasource* source)
+{
+  constexpr auto header_len = sizeof(file_header_s);
+  constexpr auto ender_len  = sizeof(file_ender_s);
+
+  const auto len           = source->size();
+  const auto header_buffer = source->host_read(0, header_len);
+  const auto header        = reinterpret_cast<const file_header_s*>(header_buffer->data());
+  const auto ender_buffer  = source->host_read(len - ender_len, ender_len);
+  const auto ender         = reinterpret_cast<const file_ender_s*>(ender_buffer->data());
+  CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source");
+  CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic,
+               "Corrupted header or footer");
+  CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len),
+               "Incorrect footer length");
+
+  const auto buffer = source->host_read(len - ender->footer_len - ender_len, ender->footer_len);
+  CompactProtocolReader cp(buffer->data(), ender->footer_len);
+  CUDF_EXPECTS(cp.read(this), "Cannot parse metadata");
+  CUDF_EXPECTS(cp.InitSchema(this), "Cannot initialize schema");
+}
+
+std::vector<metadata> aggregate_reader_metadata::metadatas_from_sources(
+  std::vector<std::unique_ptr<datasource>> const& sources)
+{
+  std::vector<metadata> metadatas;
+  std::transform(
+    sources.cbegin(), sources.cend(), std::back_inserter(metadatas), [](auto const& source) {
+      return metadata(source.get());
+    });
+  return metadatas;
+}
+
+std::vector<std::unordered_map<std::string, std::string>>
+aggregate_reader_metadata::collect_keyval_metadata() const
+{
+  std::vector<std::unordered_map<std::string, std::string>> kv_maps;
+  std::transform(per_file_metadata.cbegin(),
+                 per_file_metadata.cend(),
+                 std::back_inserter(kv_maps),
+                 [](auto const& pfm) {
+                   std::unordered_map<std::string, std::string> kv_map;
+                   std::transform(pfm.key_value_metadata.cbegin(),
+                                  pfm.key_value_metadata.cend(),
+                                  std::inserter(kv_map, kv_map.end()),
+                                  [](auto const& kv) {
+                                    return std::pair{kv.key, kv.value};
+                                  });
+                   return kv_map;
+                 });
+
+  return kv_maps;
+}
+
+size_type aggregate_reader_metadata::calc_num_rows() const
+{
+  return std::accumulate(
+    per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
+      return sum + pfm.num_rows;
+    });
+}
+
+size_type aggregate_reader_metadata::calc_num_row_groups() const
+{
+  return std::accumulate(
+    per_file_metadata.begin(), per_file_metadata.end(), 0, [](auto& sum, auto& pfm) {
+      return sum + pfm.row_groups.size();
+    });
+}
+
+aggregate_reader_metadata::aggregate_reader_metadata(
+  std::vector<std::unique_ptr<datasource>> const& sources)
+  : per_file_metadata(metadatas_from_sources(sources)),
+    keyval_maps(collect_keyval_metadata()),
+    num_rows(calc_num_rows()),
+    num_row_groups(calc_num_row_groups())
+{
+  if (per_file_metadata.size() > 0) {
+    auto const& first_meta = per_file_metadata.front();
+    auto const num_cols =
+      first_meta.row_groups.size() > 0 ? first_meta.row_groups.front().columns.size() : 0;
+    auto const& schema = first_meta.schema;
+
+    // Verify that the input files have matching numbers of columns and schema.
+    for (auto const& pfm : per_file_metadata) {
+      if (pfm.row_groups.size() > 0) {
+        CUDF_EXPECTS(num_cols == pfm.row_groups.front().columns.size(),
+                     "All sources must have the same number of columns");
+      }
+      CUDF_EXPECTS(schema == pfm.schema, "All sources must have the same schema");
+    }
+  }
+}
+
+RowGroup const& aggregate_reader_metadata::get_row_group(size_type row_group_index,
+                                                         size_type src_idx) const
+{
+  CUDF_EXPECTS(src_idx >= 0 && src_idx < static_cast<size_type>(per_file_metadata.size()),
+               "invalid source index");
+  return per_file_metadata[src_idx].row_groups[row_group_index];
+}
+
+ColumnChunkMetaData const& aggregate_reader_metadata::get_column_metadata(size_type row_group_index,
+                                                                          size_type src_idx,
+                                                                          int schema_idx) const
+{
+  auto col = std::find_if(
+    per_file_metadata[src_idx].row_groups[row_group_index].columns.begin(),
+    per_file_metadata[src_idx].row_groups[row_group_index].columns.end(),
+    [schema_idx](ColumnChunk const& col) { return col.schema_idx == schema_idx ? true : false; });
+  CUDF_EXPECTS(col != std::end(per_file_metadata[src_idx].row_groups[row_group_index].columns),
+               "Found no metadata for schema index");
+  return col->meta_data;
+}
+
+std::string aggregate_reader_metadata::get_pandas_index() const
+{
+  // Assumes that all input files have the same metadata
+  // TODO: verify this assumption
+  auto it = keyval_maps[0].find("pandas");
+  if (it != keyval_maps[0].end()) {
+    // Captures a list of quoted strings found inside square brackets after `"index_columns":`
+    // Inside quotes supports newlines, brackets, escaped quotes, etc.
+    // One-liner regex:
+    // "index_columns"\s*:\s*\[\s*((?:"(?:|(?:.*?(?![^\\]")).?)[^\\]?",?\s*)*)\]
+    // Documented below.
+    std::regex index_columns_expr{
+      R"("index_columns"\s*:\s*\[\s*)"  // match preamble, opening square bracket, whitespace
+      R"(()"                            // Open first capturing group
+      R"((?:")"                         // Open non-capturing group match opening quote
+      R"((?:|(?:.*?(?![^\\]")).?))"     // match empty string or anything between quotes
+      R"([^\\]?")"                      // Match closing non-escaped quote
+      R"(,?\s*)"                        // Match optional comma and whitespace
+      R"()*)"                           // Close non-capturing group and repeat 0 or more times
+      R"())"                            // Close first capturing group
+      R"(\])"                           // Match closing square brackets
+    };
+    std::smatch sm;
+    if (std::regex_search(it->second, sm, index_columns_expr)) { return sm[1].str(); }
+  }
+  return "";
+}
+
+std::vector<std::string> aggregate_reader_metadata::get_pandas_index_names() const
+{
+  std::vector<std::string> names;
+  auto str = get_pandas_index();
+  if (str.length() != 0) {
+    std::regex index_name_expr{R"(\"((?:\\.|[^\"])*)\")"};
+    std::smatch sm;
+    while (std::regex_search(str, sm, index_name_expr)) {
+      if (sm.size() == 2) {  // 2 = whole match, first item
+        if (std::find(names.begin(), names.end(), sm[1].str()) == names.end()) {
+          std::regex esc_quote{R"(\\")"};
+          names.emplace_back(std::regex_replace(sm[1].str(), esc_quote, R"(")"));
+        }
+      }
+      str = sm.suffix();
+    }
+  }
+  return names;
+}
+
+std::tuple<size_type, size_type, std::vector<row_group_info>>
+aggregate_reader_metadata::select_row_groups(
+  host_span<std::vector<size_type> const> row_group_indices,
+  size_type row_start,
+  size_type row_count) const
+{
+  std::vector<row_group_info> selection;
+
+  if (!row_group_indices.empty()) {
+    CUDF_EXPECTS(row_group_indices.size() == per_file_metadata.size(),
+                 "Must specify row groups for each source");
+
+    row_count = 0;
+    for (size_t src_idx = 0; src_idx < row_group_indices.size(); ++src_idx) {
+      for (auto const& rowgroup_idx : row_group_indices[src_idx]) {
+        CUDF_EXPECTS(
+          rowgroup_idx >= 0 &&
+            rowgroup_idx < static_cast<size_type>(per_file_metadata[src_idx].row_groups.size()),
+          "Invalid rowgroup index");
+        selection.emplace_back(rowgroup_idx, row_count, src_idx);
+        row_count += get_row_group(rowgroup_idx, src_idx).num_rows;
+      }
+    }
+
+    return {row_start, row_count, std::move(selection)};
+  }
+
+  row_start = std::max(row_start, 0);
+  if (row_count < 0) {
+    row_count = std::min(get_num_rows(), std::numeric_limits<size_type>::max());
+  }
+  row_count = std::min(row_count, get_num_rows() - row_start);
+  CUDF_EXPECTS(row_count >= 0, "Invalid row count");
+  CUDF_EXPECTS(row_start <= get_num_rows(), "Invalid row start");
+
+  size_type count = 0;
+  for (size_t src_idx = 0; src_idx < per_file_metadata.size(); ++src_idx) {
+    for (size_t rg_idx = 0; rg_idx < per_file_metadata[src_idx].row_groups.size(); ++rg_idx) {
+      auto const chunk_start_row = count;
+      count += get_row_group(rg_idx, src_idx).num_rows;
+      if (count > row_start || count == 0) {
+        selection.emplace_back(rg_idx, chunk_start_row, src_idx);
+      }
+      if (count >= row_start + row_count) { break; }
+    }
+  }
+
+  return {row_start, row_count, std::move(selection)};
+}
+
+std::tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<size_type>>
+aggregate_reader_metadata::select_columns(std::optional<std::vector<std::string>> const& use_names,
+                                          bool include_index,
+                                          bool strings_to_categorical,
+                                          type_id timestamp_type_id) const
+{
+  auto find_schema_child = [&](SchemaElement const& schema_elem, std::string const& name) {
+    auto const& col_schema_idx =
+      std::find_if(schema_elem.children_idx.cbegin(),
+                   schema_elem.children_idx.cend(),
+                   [&](size_t col_schema_idx) { return get_schema(col_schema_idx).name == name; });
+
+    return (col_schema_idx != schema_elem.children_idx.end())
+             ? static_cast<size_type>(*col_schema_idx)
+             : -1;
+  };
+
+  std::vector<column_buffer> output_columns;
+  std::vector<input_column_info> input_columns;
+  std::vector<int> nesting;
+
+  // Return true if column path is valid. e.g. if the path is {"struct1", "child1"}, then it is
+  // valid if "struct1.child1" exists in this file's schema. If "struct1" exists but "child1" is
+  // not a child of "struct1" then the function will return false for "struct1"
+  std::function<bool(column_name_info const*, int, std::vector<column_buffer>&, bool)>
+    build_column = [&](column_name_info const* col_name_info,
+                       int schema_idx,
+                       std::vector<column_buffer>& out_col_array,
+                       bool has_list_parent) {
+      if (schema_idx < 0) { return false; }
+      auto const& schema_elem = get_schema(schema_idx);
+
+      // if schema_elem is a stub then it does not exist in the column_name_info and column_buffer
+      // hierarchy. So continue on
+      if (schema_elem.is_stub()) {
+        // is this legit?
+        CUDF_EXPECTS(schema_elem.num_children == 1, "Unexpected number of children for stub");
+        auto child_col_name_info = (col_name_info) ? &col_name_info->children[0] : nullptr;
+        return build_column(
+          child_col_name_info, schema_elem.children_idx[0], out_col_array, has_list_parent);
+      }
+
+      // if we're at the root, this is a new output column
+      auto const col_type = schema_elem.is_one_level_list()
+                              ? type_id::LIST
+                              : to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
+      auto const dtype    = to_data_type(col_type, schema_elem);
+
+      column_buffer output_col(dtype, schema_elem.repetition_type == OPTIONAL);
+      if (has_list_parent) { output_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT; }
+      // store the index of this element if inserted in out_col_array
+      nesting.push_back(static_cast<int>(out_col_array.size()));
+      output_col.name = schema_elem.name;
+
+      // build each child
+      bool path_is_valid = false;
+      if (col_name_info == nullptr or col_name_info->children.empty()) {
+        // add all children of schema_elem.
+        // At this point, we can no longer pass a col_name_info to build_column
+        for (int idx = 0; idx < schema_elem.num_children; idx++) {
+          path_is_valid |= build_column(nullptr,
+                                        schema_elem.children_idx[idx],
+                                        output_col.children,
+                                        has_list_parent || col_type == type_id::LIST);
+        }
+      } else {
+        for (size_t idx = 0; idx < col_name_info->children.size(); idx++) {
+          path_is_valid |=
+            build_column(&col_name_info->children[idx],
+                         find_schema_child(schema_elem, col_name_info->children[idx].name),
+                         output_col.children,
+                         has_list_parent || col_type == type_id::LIST);
+        }
+      }
+
+      // if I have no children, we're at a leaf and I'm an input column (that is, one with actual
+      // data stored) so add me to the list.
+      if (schema_elem.num_children == 0) {
+        input_column_info& input_col = input_columns.emplace_back(
+          input_column_info{schema_idx, schema_elem.name, schema_elem.max_repetition_level > 0});
+
+        // set up child output column for one-level encoding list
+        if (schema_elem.is_one_level_list()) {
+          // determine the element data type
+          auto const element_type =
+            to_type_id(schema_elem, strings_to_categorical, timestamp_type_id);
+          auto const element_dtype = to_data_type(element_type, schema_elem);
+
+          column_buffer element_col(element_dtype, schema_elem.repetition_type == OPTIONAL);
+          if (has_list_parent || col_type == type_id::LIST) {
+            element_col.user_data |= PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT;
+          }
+          // store the index of this element
+          nesting.push_back(static_cast<int>(output_col.children.size()));
+          // TODO: not sure if we should assign a name or leave it blank
+          element_col.name = "element";
+
+          output_col.children.push_back(std::move(element_col));
+        }
+
+        std::copy(nesting.cbegin(), nesting.cend(), std::back_inserter(input_col.nesting));
+
+        // pop off the extra nesting element.
+        if (schema_elem.is_one_level_list()) { nesting.pop_back(); }
+
+        path_is_valid = true;  // If we're able to reach leaf then path is valid
+      }
+
+      if (path_is_valid) { out_col_array.push_back(std::move(output_col)); }
+
+      nesting.pop_back();
+      return path_is_valid;
+    };
+
+  std::vector<int> output_column_schemas;
+
+  //
+  // there is not necessarily a 1:1 mapping between input columns and output columns.
+  // For example, parquet does not explicitly store a ColumnChunkDesc for struct columns.
+  // The "structiness" is simply implied by the schema.  For example, this schema:
+  //  required group field_id=1 name {
+  //    required binary field_id=2 firstname (String);
+  //    required binary field_id=3 middlename (String);
+  //    required binary field_id=4 lastname (String);
+  // }
+  // will only contain 3 internal columns of data (firstname, middlename, lastname).  But of
+  // course "name" is ultimately the struct column we want to return.
+  //
+  // "firstname", "middlename" and "lastname" represent the input columns in the file that we
+  // process to produce the final cudf "name" column.
+  //
+  // A user can ask for a single field out of the struct e.g. firstname.
+  // In this case they'll pass a fully qualified name to the schema element like
+  // ["name", "firstname"]
+  //
+  auto const& root = get_schema(0);
+  if (not use_names.has_value()) {
+    for (auto const& schema_idx : root.children_idx) {
+      build_column(nullptr, schema_idx, output_columns, false);
+      output_column_schemas.push_back(schema_idx);
+    }
+  } else {
+    struct path_info {
+      std::string full_path;
+      int schema_idx;
+    };
+
+    // Convert schema into a vector of every possible path
+    std::vector<path_info> all_paths;
+    std::function<void(std::string, int)> add_path = [&](std::string path_till_now,
+                                                         int schema_idx) {
+      auto const& schema_elem = get_schema(schema_idx);
+      std::string curr_path   = path_till_now + schema_elem.name;
+      all_paths.push_back({curr_path, schema_idx});
+      for (auto const& child_idx : schema_elem.children_idx) {
+        add_path(curr_path + ".", child_idx);
+      }
+    };
+    for (auto const& child_idx : get_schema(0).children_idx) {
+      add_path("", child_idx);
+    }
+
+    // Find which of the selected paths are valid and get their schema index
+    std::vector<path_info> valid_selected_paths;
+    for (auto const& selected_path : *use_names) {
+      auto found_path =
+        std::find_if(all_paths.begin(), all_paths.end(), [&](path_info& valid_path) {
+          return valid_path.full_path == selected_path;
+        });
+      if (found_path != all_paths.end()) {
+        valid_selected_paths.push_back({selected_path, found_path->schema_idx});
+      }
+    }
+
+    // Now construct paths as vector of strings for further consumption
+    std::vector<std::vector<std::string>> use_names3;
+    std::transform(valid_selected_paths.begin(),
+                   valid_selected_paths.end(),
+                   std::back_inserter(use_names3),
+                   [&](path_info const& valid_path) {
+                     auto schema_idx = valid_path.schema_idx;
+                     std::vector<std::string> result_path;
+                     do {
+                       SchemaElement const& elem = get_schema(schema_idx);
+                       result_path.push_back(elem.name);
+                       schema_idx = elem.parent_idx;
+                     } while (schema_idx > 0);
+                     return std::vector<std::string>(result_path.rbegin(), result_path.rend());
+                   });
+
+    std::vector<column_name_info> selected_columns;
+    if (include_index) {
+      std::vector<std::string> index_names = get_pandas_index_names();
+      std::transform(index_names.cbegin(),
+                     index_names.cend(),
+                     std::back_inserter(selected_columns),
+                     [](std::string const& name) { return column_name_info(name); });
+    }
+    // Merge the vector use_names into a set of hierarchical column_name_info objects
+    /* This is because if we have columns like this:
+     *     col1
+     *      / \
+     *    s3   f4
+     *   / \
+     * f5   f6
+     *
+     * there may be common paths in use_names like:
+     * {"col1", "s3", "f5"}, {"col1", "f4"}
+     * which means we want the output to contain
+     *     col1
+     *      / \
+     *    s3   f4
+     *   /
+     * f5
+     *
+     * rather than
+     *  col1   col1
+     *   |      |
+     *   s3     f4
+     *   |
+     *   f5
+     */
+    for (auto const& path : use_names3) {
+      auto array_to_find_in = &selected_columns;
+      for (size_t depth = 0; depth < path.size(); ++depth) {
+        // Check if the path exists in our selected_columns and if not, add it.
+        auto const& name_to_find = path[depth];
+        auto found_col           = std::find_if(
+          array_to_find_in->begin(),
+          array_to_find_in->end(),
+          [&name_to_find](column_name_info const& col) { return col.name == name_to_find; });
+        if (found_col == array_to_find_in->end()) {
+          auto& col        = array_to_find_in->emplace_back(name_to_find);
+          array_to_find_in = &col.children;
+        } else {
+          // Path exists. go down further.
+          array_to_find_in = &found_col->children;
+        }
+      }
+    }
+    for (auto& col : selected_columns) {
+      auto const& top_level_col_schema_idx = find_schema_child(root, col.name);
+      bool valid_column = build_column(&col, top_level_col_schema_idx, output_columns, false);
+      if (valid_column) output_column_schemas.push_back(top_level_col_schema_idx);
+    }
+  }
+
+  return std::make_tuple(
+    std::move(input_columns), std::move(output_columns), std::move(output_column_schemas));
+}
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl_helpers.hpp b/cpp/src/io/parquet/reader_impl_helpers.hpp
new file mode 100644
index 00000000000..6fa86a77e46
--- /dev/null
+++ b/cpp/src/io/parquet/reader_impl_helpers.hpp
@@ -0,0 +1,197 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "compact_protocol_reader.hpp"
+#include "parquet_gpu.hpp"
+
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/io/datasource.hpp>
+#include <cudf/types.hpp>
+
+#include <tuple>
+#include <vector>
+
+namespace cudf::io::detail::parquet {
+
+using namespace cudf::io::parquet;
+
+/**
+ * @brief Function that translates Parquet datatype to cuDF type enum
+ */
+[[nodiscard]] type_id to_type_id(SchemaElement const& schema,
+                                 bool strings_to_categorical,
+                                 type_id timestamp_type_id);
+
+/**
+ * @brief Converts cuDF type enum to column logical type
+ */
+[[nodiscard]] inline data_type to_data_type(type_id t_id, SchemaElement const& schema)
+{
+  return t_id == type_id::DECIMAL32 || t_id == type_id::DECIMAL64 || t_id == type_id::DECIMAL128
+           ? data_type{t_id, numeric::scale_type{-schema.decimal_scale}}
+           : data_type{t_id};
+}
+
+/**
+ * @brief The row_group_info class
+ */
+struct row_group_info {
+  size_type const index;
+  size_t const start_row;  // TODO source index
+  size_type const source_index;
+  row_group_info(size_type index, size_t start_row, size_type source_index)
+    : index(index), start_row(start_row), source_index(source_index)
+  {
+  }
+};
+
+/**
+ * @brief Class for parsing dataset metadata
+ */
+struct metadata : public FileMetaData {
+  explicit metadata(datasource* source);
+};
+
+class aggregate_reader_metadata {
+  std::vector<metadata> per_file_metadata;
+  std::vector<std::unordered_map<std::string, std::string>> keyval_maps;
+  size_type num_rows;
+  size_type num_row_groups;
+
+  /**
+   * @brief Create a metadata object from each element in the source vector
+   */
+  static std::vector<metadata> metadatas_from_sources(
+    std::vector<std::unique_ptr<datasource>> const& sources);
+
+  /**
+   * @brief Collect the keyvalue maps from each per-file metadata object into a vector of maps.
+   */
+  [[nodiscard]] std::vector<std::unordered_map<std::string, std::string>> collect_keyval_metadata()
+    const;
+
+  /**
+   * @brief Sums up the number of rows of each source
+   */
+  [[nodiscard]] size_type calc_num_rows() const;
+
+  /**
+   * @brief Sums up the number of row groups of each source
+   */
+  [[nodiscard]] size_type calc_num_row_groups() const;
+
+ public:
+  aggregate_reader_metadata(std::vector<std::unique_ptr<datasource>> const& sources);
+
+  [[nodiscard]] RowGroup const& get_row_group(size_type row_group_index, size_type src_idx) const;
+
+  [[nodiscard]] ColumnChunkMetaData const& get_column_metadata(size_type row_group_index,
+                                                               size_type src_idx,
+                                                               int schema_idx) const;
+
+  [[nodiscard]] auto get_num_rows() const { return num_rows; }
+
+  [[nodiscard]] auto get_num_row_groups() const { return num_row_groups; }
+
+  [[nodiscard]] auto const& get_schema(int schema_idx) const
+  {
+    return per_file_metadata[0].schema[schema_idx];
+  }
+
+  [[nodiscard]] auto const& get_key_value_metadata() const { return keyval_maps; }
+
+  /**
+   * @brief Gets the concrete nesting depth of output cudf columns
+   *
+   * @param schema_index Schema index of the input column
+   *
+   * @return comma-separated index column names in quotes
+   */
+  [[nodiscard]] inline int get_output_nesting_depth(int schema_index) const
+  {
+    auto& pfm = per_file_metadata[0];
+    int depth = 0;
+
+    // walk upwards, skipping repeated fields
+    while (schema_index > 0) {
+      if (!pfm.schema[schema_index].is_stub()) { depth++; }
+      // schema of one-level encoding list doesn't contain nesting information, so we need to
+      // manually add an extra nesting level
+      if (pfm.schema[schema_index].is_one_level_list()) { depth++; }
+      schema_index = pfm.schema[schema_index].parent_idx;
+    }
+    return depth;
+  }
+
+  /**
+   * @brief Extracts the pandas "index_columns" section
+   *
+   * PANDAS adds its own metadata to the key_value section when writing out the
+   * dataframe to a file to aid in exact reconstruction. The JSON-formatted
+   * metadata contains the index column(s) and PANDA-specific datatypes.
+   *
+   * @return comma-separated index column names in quotes
+   */
+  [[nodiscard]] std::string get_pandas_index() const;
+
+  /**
+   * @brief Extracts the column name(s) used for the row indexes in a dataframe
+   *
+   * @param names List of column names to load, where index column name(s) will be added
+   */
+  [[nodiscard]] std::vector<std::string> get_pandas_index_names() const;
+
+  /**
+   * @brief Filters and reduces down to a selection of row groups
+   *
+   * The input `row_start` and `row_count` parameters will be recomputed and output as the valid
+   * values based on the input row group list.
+   *
+   * @param row_group_indices Lists of row groups to read, one per source
+   * @param row_start Starting row of the selection
+   * @param row_count Total number of rows selected
+   *
+   * @return A tuple of corrected row_start, row_count and list of row group indexes and its
+   *         starting row
+   */
+  [[nodiscard]] std::tuple<size_type, size_type, std::vector<row_group_info>> select_row_groups(
+    host_span<std::vector<size_type> const> row_group_indices,
+    size_type row_start,
+    size_type row_count) const;
+
+  /**
+   * @brief Filters and reduces down to a selection of columns
+   *
+   * @param use_names List of paths of column names to select; `nullopt` if user did not select
+   * columns to read
+   * @param include_index Whether to always include the PANDAS index column(s)
+   * @param strings_to_categorical Type conversion parameter
+   * @param timestamp_type_id Type conversion parameter
+   *
+   * @return input column information, output column information, list of output column schema
+   * indices
+   */
+  [[nodiscard]] std::
+    tuple<std::vector<input_column_info>, std::vector<column_buffer>, std::vector<size_type>>
+    select_columns(std::optional<std::vector<std::string>> const& use_names,
+                   bool include_index,
+                   bool strings_to_categorical,
+                   type_id timestamp_type_id) const;
+};
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
new file mode 100644
index 00000000000..ca2009d3c74
--- /dev/null
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "reader_impl.hpp"
+
+#include <io/comp/nvcomp_adapter.hpp>
+#include <io/utilities/config_utils.hpp>
+#include <io/utilities/time_utils.cuh>
+
+#include <cudf/detail/utilities/vector_factories.hpp>
+
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/fill.h>
+#include <thrust/logical.h>
+
+#include <numeric>
+
+namespace cudf::io::detail::parquet {
+
+namespace {
+
+/**
+ * @brief Generate depth remappings for repetition and definition levels.
+ *
+ * When dealing with columns that contain lists, we must examine incoming
+ * repetition and definition level pairs to determine what range of output nesting
+ * is indicated when adding new values.  This function generates the mappings of
+ * the R/D levels to those start/end bounds
+ *
+ * @param remap Maps column schema index to the R/D remapping vectors for that column
+ * @param src_col_schema The column schema to generate the new mapping for
+ * @param md File metadata information
+ */
+void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::vector<int>>>& remap,
+                               int src_col_schema,
+                               aggregate_reader_metadata const& md)
+{
+  // already generated for this level
+  if (remap.find(src_col_schema) != remap.end()) { return; }
+  auto schema   = md.get_schema(src_col_schema);
+  int max_depth = md.get_output_nesting_depth(src_col_schema);
+
+  CUDF_EXPECTS(remap.find(src_col_schema) == remap.end(),
+               "Attempting to remap a schema more than once");
+  auto inserted =
+    remap.insert(std::pair<int, std::pair<std::vector<int>, std::vector<int>>>{src_col_schema, {}});
+  auto& depth_remap = inserted.first->second;
+
+  std::vector<int>& rep_depth_remap = (depth_remap.first);
+  rep_depth_remap.resize(schema.max_repetition_level + 1);
+  std::vector<int>& def_depth_remap = (depth_remap.second);
+  def_depth_remap.resize(schema.max_definition_level + 1);
+
+  // the key:
+  // for incoming level values  R/D
+  // add values starting at the shallowest nesting level X has repetition level R
+  // until you reach the deepest nesting level Y that corresponds to the repetition level R1
+  // held by the nesting level that has definition level D
+  //
+  // Example: a 3 level struct with a list at the bottom
+  //
+  //                     R / D   Depth
+  // level0              0 / 1     0
+  //   level1            0 / 2     1
+  //     level2          0 / 3     2
+  //       list          0 / 3     3
+  //         element     1 / 4     4
+  //
+  // incoming R/D : 0, 0  -> add values from depth 0 to 3   (def level 0 always maps to depth 0)
+  // incoming R/D : 0, 1  -> add values from depth 0 to 3
+  // incoming R/D : 0, 2  -> add values from depth 0 to 3
+  // incoming R/D : 1, 4  -> add values from depth 4 to 4
+  //
+  // Note : the -validity- of values is simply checked by comparing the incoming D value against the
+  // D value of the given nesting level (incoming D >= the D for the nesting level == valid,
+  // otherwise NULL).  The tricky part is determining what nesting levels to add values at.
+  //
+  // For schemas with no repetition level (no lists), X is always 0 and Y is always max nesting
+  // depth.
+  //
+
+  // compute "X" from above
+  for (int s_idx = schema.max_repetition_level; s_idx >= 0; s_idx--) {
+    auto find_shallowest = [&](int r) {
+      int shallowest = -1;
+      int cur_depth  = max_depth - 1;
+      int schema_idx = src_col_schema;
+      while (schema_idx > 0) {
+        auto cur_schema = md.get_schema(schema_idx);
+        if (cur_schema.max_repetition_level == r) {
+          // if this is a repeated field, map it one level deeper
+          shallowest = cur_schema.is_stub() ? cur_depth + 1 : cur_depth;
+        }
+        // if it's one-level encoding list
+        else if (cur_schema.is_one_level_list()) {
+          shallowest = cur_depth - 1;
+        }
+        if (!cur_schema.is_stub()) { cur_depth--; }
+        schema_idx = cur_schema.parent_idx;
+      }
+      return shallowest;
+    };
+    rep_depth_remap[s_idx] = find_shallowest(s_idx);
+  }
+
+  // compute "Y" from above
+  for (int s_idx = schema.max_definition_level; s_idx >= 0; s_idx--) {
+    auto find_deepest = [&](int d) {
+      SchemaElement prev_schema;
+      int schema_idx = src_col_schema;
+      int r1         = 0;
+      while (schema_idx > 0) {
+        SchemaElement cur_schema = md.get_schema(schema_idx);
+        if (cur_schema.max_definition_level == d) {
+          // if this is a repeated field, map it one level deeper
+          r1 = cur_schema.is_stub() ? prev_schema.max_repetition_level
+                                    : cur_schema.max_repetition_level;
+          break;
+        }
+        prev_schema = cur_schema;
+        schema_idx  = cur_schema.parent_idx;
+      }
+
+      // we now know R1 from above. return the deepest nesting level that has the
+      // same repetition level
+      schema_idx = src_col_schema;
+      int depth  = max_depth - 1;
+      while (schema_idx > 0) {
+        SchemaElement cur_schema = md.get_schema(schema_idx);
+        if (cur_schema.max_repetition_level == r1) {
+          // if this is a repeated field, map it one level deeper
+          depth = cur_schema.is_stub() ? depth + 1 : depth;
+          break;
+        }
+        if (!cur_schema.is_stub()) { depth--; }
+        prev_schema = cur_schema;
+        schema_idx  = cur_schema.parent_idx;
+      }
+      return depth;
+    };
+    def_depth_remap[s_idx] = find_deepest(s_idx);
+  }
+}
+
+/**
+ * @brief Function that returns the required the number of bits to store a value
+ */
+template <typename T = uint8_t>
+[[nodiscard]] T required_bits(uint32_t max_level)
+{
+  return static_cast<T>(CompactProtocolReader::NumRequiredBits(max_level));
+}
+
+/**
+ * @brief Converts cuDF units to Parquet units.
+ *
+ * @return A tuple of Parquet type width, Parquet clock rate and Parquet decimal type.
+ */
+[[nodiscard]] std::tuple<int32_t, int32_t, int8_t> conversion_info(type_id column_type_id,
+                                                                   type_id timestamp_type_id,
+                                                                   parquet::Type physical,
+                                                                   int8_t converted,
+                                                                   int32_t length)
+{
+  int32_t type_width = (physical == parquet::FIXED_LEN_BYTE_ARRAY) ? length : 0;
+  int32_t clock_rate = 0;
+  if (column_type_id == type_id::INT8 or column_type_id == type_id::UINT8) {
+    type_width = 1;  // I32 -> I8
+  } else if (column_type_id == type_id::INT16 or column_type_id == type_id::UINT16) {
+    type_width = 2;  // I32 -> I16
+  } else if (column_type_id == type_id::INT32) {
+    type_width = 4;  // str -> hash32
+  } else if (is_chrono(data_type{column_type_id})) {
+    clock_rate = to_clockrate(timestamp_type_id);
+  }
+
+  int8_t converted_type = converted;
+  if (converted_type == parquet::DECIMAL && column_type_id != type_id::FLOAT64 &&
+      not cudf::is_fixed_point(data_type{column_type_id})) {
+    converted_type = parquet::UNKNOWN;  // Not converting to float64 or decimal
+  }
+  return std::make_tuple(type_width, clock_rate, converted_type);
+}
+
+/**
+ * @brief Reads compressed page data to device memory
+ *
+ * @param sources Dataset sources
+ * @param page_data Buffers to hold compressed page data for each chunk
+ * @param chunks List of column chunk descriptors
+ * @param begin_chunk Index of first column chunk to read
+ * @param end_chunk Index after the last column chunk to read
+ * @param column_chunk_offsets File offset for all chunks
+ * @param chunk_source_map Association between each column chunk and its source
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return A future object for reading synchronization
+ */
+[[nodiscard]] std::future<void> read_column_chunks_async(
+  std::vector<std::unique_ptr<datasource>> const& sources,
+  std::vector<std::unique_ptr<datasource::buffer>>& page_data,
+  hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+  size_t begin_chunk,
+  size_t end_chunk,
+  const std::vector<size_t>& column_chunk_offsets,
+  std::vector<size_type> const& chunk_source_map,
+  rmm::cuda_stream_view stream)
+{
+  // Transfer chunk data, coalescing adjacent chunks
+  std::vector<std::future<size_t>> read_tasks;
+  for (size_t chunk = begin_chunk; chunk < end_chunk;) {
+    const size_t io_offset   = column_chunk_offsets[chunk];
+    size_t io_size           = chunks[chunk].compressed_size;
+    size_t next_chunk        = chunk + 1;
+    const bool is_compressed = (chunks[chunk].codec != parquet::Compression::UNCOMPRESSED);
+    while (next_chunk < end_chunk) {
+      const size_t next_offset = column_chunk_offsets[next_chunk];
+      const bool is_next_compressed =
+        (chunks[next_chunk].codec != parquet::Compression::UNCOMPRESSED);
+      if (next_offset != io_offset + io_size || is_next_compressed != is_compressed) {
+        // Can't merge if not contiguous or mixing compressed and uncompressed
+        // Not coalescing uncompressed with compressed chunks is so that compressed buffers can be
+        // freed earlier (immediately after decompression stage) to limit peak memory requirements
+        break;
+      }
+      io_size += chunks[next_chunk].compressed_size;
+      next_chunk++;
+    }
+    if (io_size != 0) {
+      auto& source = sources[chunk_source_map[chunk]];
+      if (source->is_device_read_preferred(io_size)) {
+        auto buffer        = rmm::device_buffer(io_size, stream);
+        auto fut_read_size = source->device_read_async(
+          io_offset, io_size, static_cast<uint8_t*>(buffer.data()), stream);
+        read_tasks.emplace_back(std::move(fut_read_size));
+        page_data[chunk] = datasource::buffer::create(std::move(buffer));
+      } else {
+        auto const buffer = source->host_read(io_offset, io_size);
+        page_data[chunk] =
+          datasource::buffer::create(rmm::device_buffer(buffer->data(), buffer->size(), stream));
+      }
+      auto d_compdata = page_data[chunk]->data();
+      do {
+        chunks[chunk].compressed_data = d_compdata;
+        d_compdata += chunks[chunk].compressed_size;
+      } while (++chunk != next_chunk);
+    } else {
+      chunk = next_chunk;
+    }
+  }
+  auto sync_fn = [](decltype(read_tasks) read_tasks) {
+    for (auto& task : read_tasks) {
+      task.wait();
+    }
+  };
+  return std::async(std::launch::deferred, sync_fn, std::move(read_tasks));
+}
+
+/**
+ * @brief Return the number of total pages from the given column chunks.
+ *
+ * @param chunks List of column chunk descriptors
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return The total number of pages
+ */
+[[nodiscard]] size_t count_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                                        rmm::cuda_stream_view stream)
+{
+  size_t total_pages = 0;
+
+  chunks.host_to_device(stream);
+  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream);
+  chunks.device_to_host(stream, true);
+
+  for (size_t c = 0; c < chunks.size(); c++) {
+    total_pages += chunks[c].num_data_pages + chunks[c].num_dict_pages;
+  }
+
+  return total_pages;
+}
+
+/**
+ * @brief Decode the page information from the given column chunks.
+ *
+ * @param chunks List of column chunk descriptors
+ * @param pages List of page information
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ */
+void decode_page_headers(hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+                         hostdevice_vector<gpu::PageInfo>& pages,
+                         rmm::cuda_stream_view stream)
+{
+  // IMPORTANT : if you change how pages are stored within a chunk (dist pages, then data pages),
+  // please update preprocess_nested_columns to reflect this.
+  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+    chunks[c].max_num_pages = chunks[c].num_data_pages + chunks[c].num_dict_pages;
+    chunks[c].page_info     = pages.device_ptr(page_count);
+    page_count += chunks[c].max_num_pages;
+  }
+
+  chunks.host_to_device(stream);
+  gpu::DecodePageHeaders(chunks.device_ptr(), chunks.size(), stream);
+  pages.device_to_host(stream, true);
+}
+
+/**
+ * @brief Decompresses the page data, at page granularity.
+ *
+ * @param chunks List of column chunk descriptors
+ * @param pages List of page information
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ *
+ * @return Device buffer to decompressed page data
+ */
+[[nodiscard]] rmm::device_buffer decompress_page_data(
+  hostdevice_vector<gpu::ColumnChunkDesc>& chunks,
+  hostdevice_vector<gpu::PageInfo>& pages,
+  rmm::cuda_stream_view stream)
+{
+  auto for_each_codec_page = [&](parquet::Compression codec, const std::function<void(size_t)>& f) {
+    for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+      const auto page_stride = chunks[c].max_num_pages;
+      if (chunks[c].codec == codec) {
+        for (int k = 0; k < page_stride; k++) {
+          f(page_count + k);
+        }
+      }
+      page_count += page_stride;
+    }
+  };
+
+  // Brotli scratch memory for decompressing
+  rmm::device_buffer debrotli_scratch;
+
+  // Count the exact number of compressed pages
+  size_t num_comp_pages    = 0;
+  size_t total_decomp_size = 0;
+
+  struct codec_stats {
+    parquet::Compression compression_type = UNCOMPRESSED;
+    size_t num_pages                      = 0;
+    int32_t max_decompressed_size         = 0;
+    size_t total_decomp_size              = 0;
+  };
+
+  std::array codecs{codec_stats{parquet::GZIP},
+                    codec_stats{parquet::SNAPPY},
+                    codec_stats{parquet::BROTLI},
+                    codec_stats{parquet::ZSTD}};
+
+  auto is_codec_supported = [&codecs](int8_t codec) {
+    if (codec == parquet::UNCOMPRESSED) return true;
+    return std::find_if(codecs.begin(), codecs.end(), [codec](auto& cstats) {
+             return codec == cstats.compression_type;
+           }) != codecs.end();
+  };
+  CUDF_EXPECTS(std::all_of(chunks.begin(),
+                           chunks.end(),
+                           [&is_codec_supported](auto const& chunk) {
+                             return is_codec_supported(chunk.codec);
+                           }),
+               "Unsupported compression type");
+
+  for (auto& codec : codecs) {
+    for_each_codec_page(codec.compression_type, [&](size_t page) {
+      auto page_uncomp_size = pages[page].uncompressed_page_size;
+      total_decomp_size += page_uncomp_size;
+      codec.total_decomp_size += page_uncomp_size;
+      codec.max_decompressed_size = std::max(codec.max_decompressed_size, page_uncomp_size);
+      codec.num_pages++;
+      num_comp_pages++;
+    });
+    if (codec.compression_type == parquet::BROTLI && codec.num_pages > 0) {
+      debrotli_scratch.resize(get_gpu_debrotli_scratch_size(codec.num_pages), stream);
+    }
+  }
+
+  // Dispatch batches of pages to decompress for each codec
+  rmm::device_buffer decomp_pages(total_decomp_size, stream);
+
+  std::vector<device_span<uint8_t const>> comp_in;
+  comp_in.reserve(num_comp_pages);
+  std::vector<device_span<uint8_t>> comp_out;
+  comp_out.reserve(num_comp_pages);
+
+  // vectors to save v2 def and rep level data, if any
+  std::vector<device_span<uint8_t const>> copy_in;
+  copy_in.reserve(num_comp_pages);
+  std::vector<device_span<uint8_t>> copy_out;
+  copy_out.reserve(num_comp_pages);
+
+  rmm::device_uvector<compression_result> comp_res(num_comp_pages, stream);
+  thrust::fill(rmm::exec_policy(stream),
+               comp_res.begin(),
+               comp_res.end(),
+               compression_result{0, compression_status::FAILURE});
+
+  size_t decomp_offset = 0;
+  int32_t start_pos    = 0;
+  for (const auto& codec : codecs) {
+    if (codec.num_pages == 0) { continue; }
+
+    for_each_codec_page(codec.compression_type, [&](size_t page_idx) {
+      auto const dst_base = static_cast<uint8_t*>(decomp_pages.data()) + decomp_offset;
+      auto& page          = pages[page_idx];
+      // offset will only be non-zero for V2 pages
+      auto const offset = page.def_lvl_bytes + page.rep_lvl_bytes;
+      // for V2 need to copy def and rep level info into place, and then offset the
+      // input and output buffers. otherwise we'd have to keep both the compressed
+      // and decompressed data.
+      if (offset != 0) {
+        copy_in.emplace_back(page.page_data, offset);
+        copy_out.emplace_back(dst_base, offset);
+      }
+      comp_in.emplace_back(page.page_data + offset,
+                           static_cast<size_t>(page.compressed_page_size - offset));
+      comp_out.emplace_back(dst_base + offset,
+                            static_cast<size_t>(page.uncompressed_page_size - offset));
+      page.page_data = dst_base;
+      decomp_offset += page.uncompressed_page_size;
+    });
+
+    host_span<device_span<uint8_t const> const> comp_in_view{comp_in.data() + start_pos,
+                                                             codec.num_pages};
+    auto const d_comp_in = cudf::detail::make_device_uvector_async(comp_in_view, stream);
+    host_span<device_span<uint8_t> const> comp_out_view(comp_out.data() + start_pos,
+                                                        codec.num_pages);
+    auto const d_comp_out = cudf::detail::make_device_uvector_async(comp_out_view, stream);
+    device_span<compression_result> d_comp_res_view(comp_res.data() + start_pos, codec.num_pages);
+
+    switch (codec.compression_type) {
+      case parquet::GZIP:
+        gpuinflate(d_comp_in, d_comp_out, d_comp_res_view, gzip_header_included::YES, stream);
+        break;
+      case parquet::SNAPPY:
+        if (nvcomp_integration::is_stable_enabled()) {
+          nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY,
+                                     d_comp_in,
+                                     d_comp_out,
+                                     d_comp_res_view,
+                                     codec.max_decompressed_size,
+                                     codec.total_decomp_size,
+                                     stream);
+        } else {
+          gpu_unsnap(d_comp_in, d_comp_out, d_comp_res_view, stream);
+        }
+        break;
+      case parquet::ZSTD:
+        nvcomp::batched_decompress(nvcomp::compression_type::ZSTD,
+                                   d_comp_in,
+                                   d_comp_out,
+                                   d_comp_res_view,
+                                   codec.max_decompressed_size,
+                                   codec.total_decomp_size,
+                                   stream);
+        break;
+      case parquet::BROTLI:
+        gpu_debrotli(d_comp_in,
+                     d_comp_out,
+                     d_comp_res_view,
+                     debrotli_scratch.data(),
+                     debrotli_scratch.size(),
+                     stream);
+        break;
+      default: CUDF_FAIL("Unexpected decompression dispatch"); break;
+    }
+    start_pos += codec.num_pages;
+  }
+
+  CUDF_EXPECTS(thrust::all_of(rmm::exec_policy(stream),
+                              comp_res.begin(),
+                              comp_res.end(),
+                              [] __device__(auto const& res) {
+                                return res.status == compression_status::SUCCESS;
+                              }),
+               "Error during decompression");
+
+  // now copy the uncompressed V2 def and rep level data
+  if (not copy_in.empty()) {
+    auto const d_copy_in  = cudf::detail::make_device_uvector_async(copy_in, stream);
+    auto const d_copy_out = cudf::detail::make_device_uvector_async(copy_out, stream);
+
+    gpu_copy_uncompressed_blocks(d_copy_in, d_copy_out, stream);
+    stream.synchronize();
+  }
+
+  // Update the page information in device memory with the updated value of
+  // page_data; it now points to the uncompressed data buffer
+  pages.host_to_device(stream);
+
+  return decomp_pages;
+}
+
+}  // namespace
+
+void reader::impl::allocate_nesting_info()
+{
+  auto const& chunks      = _file_itm_data.chunks;
+  auto& pages             = _file_itm_data.pages_info;
+  auto& page_nesting_info = _file_itm_data.page_nesting_info;
+
+  // compute total # of page_nesting infos needed and allocate space. doing this in one
+  // buffer to keep it to a single gpu allocation
+  size_t const total_page_nesting_infos = std::accumulate(
+    chunks.host_ptr(), chunks.host_ptr() + chunks.size(), 0, [&](int total, auto& chunk) {
+      // the schema of the input column
+      auto const& schema                    = _metadata->get_schema(chunk.src_col_schema);
+      auto const per_page_nesting_info_size = max(
+        schema.max_definition_level + 1, _metadata->get_output_nesting_depth(chunk.src_col_schema));
+      return total + (per_page_nesting_info_size * chunk.num_data_pages);
+    });
+
+  page_nesting_info = hostdevice_vector<gpu::PageNestingInfo>{total_page_nesting_infos, _stream};
+
+  // retrieve from the gpu so we can update
+  pages.device_to_host(_stream, true);
+
+  // update pointers in the PageInfos
+  int target_page_index = 0;
+  int src_info_index    = 0;
+  for (size_t idx = 0; idx < chunks.size(); idx++) {
+    int src_col_schema                    = chunks[idx].src_col_schema;
+    auto& schema                          = _metadata->get_schema(src_col_schema);
+    auto const per_page_nesting_info_size = std::max(
+      schema.max_definition_level + 1, _metadata->get_output_nesting_depth(src_col_schema));
+
+    // skip my dict pages
+    target_page_index += chunks[idx].num_dict_pages;
+    for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
+      pages[target_page_index + p_idx].nesting = page_nesting_info.device_ptr() + src_info_index;
+      pages[target_page_index + p_idx].num_nesting_levels = per_page_nesting_info_size;
+
+      src_info_index += per_page_nesting_info_size;
+    }
+    target_page_index += chunks[idx].num_data_pages;
+  }
+
+  // copy back to the gpu
+  pages.host_to_device(_stream);
+
+  // fill in
+  int nesting_info_index = 0;
+  std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
+  for (size_t idx = 0; idx < chunks.size(); idx++) {
+    int src_col_schema = chunks[idx].src_col_schema;
+
+    // schema of the input column
+    auto& schema = _metadata->get_schema(src_col_schema);
+    // real depth of the output cudf column hierarchy (1 == no nesting, 2 == 1 level, etc)
+    int max_depth = _metadata->get_output_nesting_depth(src_col_schema);
+
+    // # of nesting infos stored per page for this column
+    auto const per_page_nesting_info_size = std::max(schema.max_definition_level + 1, max_depth);
+
+    // if this column has lists, generate depth remapping
+    std::map<int, std::pair<std::vector<int>, std::vector<int>>> depth_remapping;
+    if (schema.max_repetition_level > 0) {
+      generate_depth_remappings(depth_remapping, src_col_schema, *_metadata);
+    }
+
+    // fill in host-side nesting info
+    int schema_idx  = src_col_schema;
+    auto cur_schema = _metadata->get_schema(schema_idx);
+    int cur_depth   = max_depth - 1;
+    while (schema_idx > 0) {
+      // stub columns (basically the inner field of a list scheme element) are not real columns.
+      // we can ignore them for the purposes of output nesting info
+      if (!cur_schema.is_stub()) {
+        // initialize each page within the chunk
+        for (int p_idx = 0; p_idx < chunks[idx].num_data_pages; p_idx++) {
+          gpu::PageNestingInfo* pni =
+            &page_nesting_info[nesting_info_index + (p_idx * per_page_nesting_info_size)];
+
+          // if we have lists, set our start and end depth remappings
+          if (schema.max_repetition_level > 0) {
+            auto remap = depth_remapping.find(src_col_schema);
+            CUDF_EXPECTS(remap != depth_remapping.end(),
+                         "Could not find depth remapping for schema");
+            std::vector<int> const& rep_depth_remap = (remap->second.first);
+            std::vector<int> const& def_depth_remap = (remap->second.second);
+
+            for (size_t m = 0; m < rep_depth_remap.size(); m++) {
+              pni[m].start_depth = rep_depth_remap[m];
+            }
+            for (size_t m = 0; m < def_depth_remap.size(); m++) {
+              pni[m].end_depth = def_depth_remap[m];
+            }
+          }
+
+          // values indexed by output column index
+          pni[cur_depth].max_def_level = cur_schema.max_definition_level;
+          pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
+          pni[cur_depth].size          = 0;
+        }
+
+        // move up the hierarchy
+        cur_depth--;
+      }
+
+      // next schema
+      schema_idx = cur_schema.parent_idx;
+      cur_schema = _metadata->get_schema(schema_idx);
+    }
+
+    nesting_info_index += (per_page_nesting_info_size * chunks[idx].num_data_pages);
+  }
+
+  // copy nesting info to the device
+  page_nesting_info.host_to_device(_stream);
+}
+
+void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
+                                            size_type num_rows)
+{
+  // This function should never be called if `num_rows == 0`.
+  CUDF_EXPECTS(num_rows > 0, "Number of reading rows must not be zero.");
+
+  auto& raw_page_data    = _file_itm_data.raw_page_data;
+  auto& decomp_page_data = _file_itm_data.decomp_page_data;
+  auto& chunks           = _file_itm_data.chunks;
+  auto& pages_info       = _file_itm_data.pages_info;
+
+  // Descriptors for all the chunks that make up the selected columns
+  const auto num_input_columns = _input_columns.size();
+  const auto num_chunks        = row_groups_info.size() * num_input_columns;
+  chunks                       = hostdevice_vector<gpu::ColumnChunkDesc>(0, num_chunks, _stream);
+
+  // Association between each column chunk and its source
+  std::vector<size_type> chunk_source_map(num_chunks);
+
+  // Tracker for eventually deallocating compressed and uncompressed data
+  raw_page_data = std::vector<std::unique_ptr<datasource::buffer>>(num_chunks);
+
+  // Keep track of column chunk file offsets
+  std::vector<size_t> column_chunk_offsets(num_chunks);
+
+  // Initialize column chunk information
+  size_t total_decompressed_size = 0;
+  auto remaining_rows            = num_rows;
+  std::vector<std::future<void>> read_rowgroup_tasks;
+  for (const auto& rg : row_groups_info) {
+    const auto& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
+    auto const row_group_start  = rg.start_row;
+    auto const row_group_source = rg.source_index;
+    auto const row_group_rows   = std::min<int>(remaining_rows, row_group.num_rows);
+    auto const io_chunk_idx     = chunks.size();
+
+    // generate ColumnChunkDesc objects for everything to be decoded (all input columns)
+    for (size_t i = 0; i < num_input_columns; ++i) {
+      auto col = _input_columns[i];
+      // look up metadata
+      auto& col_meta = _metadata->get_column_metadata(rg.index, rg.source_index, col.schema_idx);
+      auto& schema   = _metadata->get_schema(col.schema_idx);
+
+      auto [type_width, clock_rate, converted_type] =
+        conversion_info(to_type_id(schema, _strings_to_categorical, _timestamp_type.id()),
+                        _timestamp_type.id(),
+                        schema.type,
+                        schema.converted_type,
+                        schema.type_length);
+
+      column_chunk_offsets[chunks.size()] =
+        (col_meta.dictionary_page_offset != 0)
+          ? std::min(col_meta.data_page_offset, col_meta.dictionary_page_offset)
+          : col_meta.data_page_offset;
+
+      chunks.push_back(gpu::ColumnChunkDesc(col_meta.total_compressed_size,
+                                            nullptr,
+                                            col_meta.num_values,
+                                            schema.type,
+                                            type_width,
+                                            row_group_start,
+                                            row_group_rows,
+                                            schema.max_definition_level,
+                                            schema.max_repetition_level,
+                                            _metadata->get_output_nesting_depth(col.schema_idx),
+                                            required_bits(schema.max_definition_level),
+                                            required_bits(schema.max_repetition_level),
+                                            col_meta.codec,
+                                            converted_type,
+                                            schema.logical_type,
+                                            schema.decimal_scale,
+                                            clock_rate,
+                                            i,
+                                            col.schema_idx));
+
+      // Map each column chunk to its column index and its source index
+      chunk_source_map[chunks.size() - 1] = row_group_source;
+
+      if (col_meta.codec != Compression::UNCOMPRESSED) {
+        total_decompressed_size += col_meta.total_uncompressed_size;
+      }
+    }
+    // Read compressed chunk data to device memory
+    read_rowgroup_tasks.push_back(read_column_chunks_async(_sources,
+                                                           raw_page_data,
+                                                           chunks,
+                                                           io_chunk_idx,
+                                                           chunks.size(),
+                                                           column_chunk_offsets,
+                                                           chunk_source_map,
+                                                           _stream));
+
+    remaining_rows -= row_group.num_rows;
+  }
+  for (auto& task : read_rowgroup_tasks) {
+    task.wait();
+  }
+  CUDF_EXPECTS(remaining_rows <= 0, "All rows data must be read.");
+
+  // Process dataset chunk pages into output columns
+  auto const total_pages = count_page_headers(chunks, _stream);
+  pages_info             = hostdevice_vector<gpu::PageInfo>(total_pages, total_pages, _stream);
+
+  if (total_pages > 0) {
+    // decoding of column/page information
+    decode_page_headers(chunks, pages_info, _stream);
+    if (total_decompressed_size > 0) {
+      decomp_page_data = decompress_page_data(chunks, pages_info, _stream);
+      // Free compressed data
+      for (size_t c = 0; c < chunks.size(); c++) {
+        if (chunks[c].codec != parquet::Compression::UNCOMPRESSED) {
+          raw_page_data[c].reset();
+          // TODO: Check if this is called
+        }
+      }
+    }
+
+    // build output column info
+    // walk the schema, building out_buffers that mirror what our final cudf columns will look
+    // like. important : there is not necessarily a 1:1 mapping between input columns and output
+    // columns. For example, parquet does not explicitly store a ColumnChunkDesc for struct
+    // columns. The "structiness" is simply implied by the schema.  For example, this schema:
+    //  required group field_id=1 name {
+    //    required binary field_id=2 firstname (String);
+    //    required binary field_id=3 middlename (String);
+    //    required binary field_id=4 lastname (String);
+    // }
+    // will only contain 3 columns of data (firstname, middlename, lastname).  But of course
+    // "name" is a struct column that we want to return, so we have to make sure that we
+    // create it ourselves.
+    // std::vector<output_column_info> output_info = build_output_column_info();
+
+    // nesting information (sizes, etc) stored -per page-
+    // note : even for flat schemas, we allocate 1 level of "nesting" info
+    allocate_nesting_info();
+  }
+}
+
+void reader::impl::allocate_columns(size_t min_row, size_t total_rows, bool uses_custom_row_bounds)
+{
+  auto const& chunks = _file_itm_data.chunks;
+  auto& pages        = _file_itm_data.pages_info;
+
+  // iterate over all input columns and allocate any associated output
+  // buffers if they are not part of a list hierarchy. mark down
+  // if we have any list columns that need further processing.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
+
+    auto* cols = &_output_buffers;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
+      // to know how big this buffer actually is.
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+      }
+      // if we haven't already processed this column because it is part of a struct hierarchy
+      else if (out_buf.size == 0) {
+        // add 1 for the offset if this is a list column
+        out_buf.create(
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows,
+          _stream,
+          _mr);
+      }
+    }
+  }
+
+  // if we have columns containing lists, further preprocessing is necessary.
+  if (has_lists) {
+    gpu::PreprocessColumnData(pages,
+                              chunks,
+                              _input_columns,
+                              _output_buffers,
+                              total_rows,
+                              min_row,
+                              uses_custom_row_bounds,
+                              _stream,
+                              _mr);
+    _stream.synchronize();
+  }
+}
+
+}  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index a49dbcc703c..090c275fcbc 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -19,6 +19,7 @@
  * @brief cuDF-IO parquet writer class implementation
  */
 
+#include "parquet_gpu.cuh"
 #include "writer_impl.hpp"
 
 #include "compact_protocol_reader.hpp"

From f87d2b4dbe0664d8521e2b754dcbff1e9208b8e1 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Thu, 10 Nov 2022 18:45:28 -0600
Subject: [PATCH 156/202] Add symlinks to notebooks. (#12128)

Adds symlinks to notebooks from the user guide as requested by @taureandyernv.

Going forward, new notebooks added to the user guide directory should also be symlinked in `/notebooks`.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12128
---
 notebooks/10min.ipynb         | 1 +
 notebooks/cupy-interop.ipynb  | 1 +
 notebooks/guide-to-udfs.ipynb | 1 +
 notebooks/missing-data.ipynb  | 1 +
 4 files changed, 4 insertions(+)
 create mode 120000 notebooks/10min.ipynb
 create mode 120000 notebooks/cupy-interop.ipynb
 create mode 120000 notebooks/guide-to-udfs.ipynb
 create mode 120000 notebooks/missing-data.ipynb

diff --git a/notebooks/10min.ipynb b/notebooks/10min.ipynb
new file mode 120000
index 00000000000..bd57fc7375e
--- /dev/null
+++ b/notebooks/10min.ipynb
@@ -0,0 +1 @@
+../docs/cudf/source/user_guide/10min.ipynb
\ No newline at end of file
diff --git a/notebooks/cupy-interop.ipynb b/notebooks/cupy-interop.ipynb
new file mode 120000
index 00000000000..0ba88107fc5
--- /dev/null
+++ b/notebooks/cupy-interop.ipynb
@@ -0,0 +1 @@
+../docs/cudf/source/user_guide/cupy-interop.ipynb
\ No newline at end of file
diff --git a/notebooks/guide-to-udfs.ipynb b/notebooks/guide-to-udfs.ipynb
new file mode 120000
index 00000000000..a4bbe597fee
--- /dev/null
+++ b/notebooks/guide-to-udfs.ipynb
@@ -0,0 +1 @@
+../docs/cudf/source/user_guide/guide-to-udfs.ipynb
\ No newline at end of file
diff --git a/notebooks/missing-data.ipynb b/notebooks/missing-data.ipynb
new file mode 120000
index 00000000000..7e3b01ae0b3
--- /dev/null
+++ b/notebooks/missing-data.ipynb
@@ -0,0 +1 @@
+../docs/cudf/source/user_guide/missing-data.ipynb
\ No newline at end of file

From 3894427ecd6b6682eeb2d6c542667dea00fa5e6e Mon Sep 17 00:00:00 2001
From: Liangcai Li <firestarmanllc@gmail.com>
Date: Fri, 11 Nov 2022 13:10:51 +0800
Subject: [PATCH 157/202] Add JNI for `substring` without 'end' parameter.
 (#12113)

Authors:
  - Liangcai Li (https://github.com/firestarman)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12113
---
 java/src/main/java/ai/rapids/cudf/ColumnView.java | 14 +++++++++++---
 java/src/main/native/src/ColumnViewJni.cpp        | 15 +++++++++++++--
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/ColumnView.java b/java/src/main/java/ai/rapids/cudf/ColumnView.java
index e639320b028..57849b9ba0a 100644
--- a/java/src/main/java/ai/rapids/cudf/ColumnView.java
+++ b/java/src/main/java/ai/rapids/cudf/ColumnView.java
@@ -2632,12 +2632,13 @@ public final ColumnVector stringSplitRecord(String delimiter) {
 
   /**
    * Returns a new strings column that contains substrings of the strings in the provided column.
-   * Overloading subString to support if end index is not provided. Appending -1 to indicate to
-   * read until end of string.
+   * The character positions to retrieve in each string are `[start, <the string end>)`..
+   *
    * @param start first character index to begin the substring(inclusive).
    */
   public final ColumnVector substring(int start) {
-    return substring(start, -1);
+    assert type.equals(DType.STRING) : "column type must be a String";
+    return new ColumnVector(substringS(getNativeView(), start));
   }
 
   /**
@@ -3983,6 +3984,13 @@ private static native long stringSplitRecord(long nativeHandle, String pattern,
    */
   private static native long substring(long columnView, int start, int end) throws CudfException;
 
+  /**
+   * Native method to extract substrings from a given strings column.
+   * @param columnView native handle of the cudf::column_view being operated on.
+   * @param start      first character index to begin the substrings (inclusive).
+   */
+  private static native long substringS(long columnView, int start) throws CudfException;
+
   /**
    * Native method to calculate substring from a given string column.
    * @param columnView native handle of the cudf::column_view being operated on.
diff --git a/java/src/main/native/src/ColumnViewJni.cpp b/java/src/main/native/src/ColumnViewJni.cpp
index 9d442772261..4acc14c760c 100644
--- a/java/src/main/native/src/ColumnViewJni.cpp
+++ b/java/src/main/native/src/ColumnViewJni.cpp
@@ -1397,6 +1397,18 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_binaryOpVS(JNIEnv *env, j
   CATCH_STD(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substringS(JNIEnv *env, jclass,
+                                                                  jlong cv_handle, jint start) {
+  JNI_NULL_CHECK(env, cv_handle, "column is null", 0);
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const cv = reinterpret_cast<cudf::column_view const *>(cv_handle);
+    auto const scv = cudf::strings_column_view{*cv};
+    return release_as_jlong(cudf::strings::slice_strings(scv, start));
+  }
+  CATCH_STD(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substring(JNIEnv *env, jclass,
                                                                  jlong column_view, jint start,
                                                                  jint end) {
@@ -1405,8 +1417,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ColumnView_substring(JNIEnv *env, jc
     cudf::jni::auto_set_device(env);
     cudf::column_view *cv = reinterpret_cast<cudf::column_view *>(column_view);
     cudf::strings_column_view scv(*cv);
-    return release_as_jlong((end == -1 ? cudf::strings::slice_strings(scv, start) :
-                                         cudf::strings::slice_strings(scv, start, end)));
+    return release_as_jlong(cudf::strings::slice_strings(scv, start, end));
   }
   CATCH_STD(env, 0);
 }

From d335aa3dcf6a3c55dc7dbd95b94ca8e4b409daee Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Thu, 10 Nov 2022 22:40:30 -0800
Subject: [PATCH 158/202] Fix alignment of compressed blocks in ORC writer
 (#12077)

Closes #11812
Fixed alignment of compressed blocks in ORC writer - impacted ZLIB compression.
Re-enabled nvCOMP DEFLATE compression in ORC - nvCOMP 2.5+ only.

Refactored the nvCOMP feature status(enabled/disabled in cuDF) checks to include reason why features are not enabled (if not enabled).
Refactored call sites to return the detailed error message if an operation fails because of nvCOMP integration config.
Refactored nvCOMP adapter macros to allow mocking of the parameters that determine if an nvCOMP feature is enabled (env var, GPU compute capability, nvCOMP version).
Added tests to verify the logic of the newly refactored feature status checks (allowed by the mocking above).
Fix a Parquet test that was calling ORC reader/writer :grimacing:

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Jim Brennan (https://github.com/jbrennan333)
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12077
---
 cpp/src/io/comp/nvcomp_adapter.cpp         | 227 +++++++++++++--------
 cpp/src/io/comp/nvcomp_adapter.hpp         |  48 ++++-
 cpp/src/io/orc/reader_impl.cu              |  23 ++-
 cpp/src/io/orc/stripe_enc.cu               |  20 +-
 cpp/src/io/orc/writer_impl.cu              |  21 +-
 cpp/src/io/parquet/writer_impl.cu          |  25 ++-
 cpp/src/io/text/bgzip_data_chunk_source.cu |  14 +-
 cpp/tests/io/comp/decomp_test.cpp          |  58 ++++++
 python/cudf/cudf/tests/test_parquet.py     |   4 +-
 9 files changed, 306 insertions(+), 134 deletions(-)

diff --git a/cpp/src/io/comp/nvcomp_adapter.cpp b/cpp/src/io/comp/nvcomp_adapter.cpp
index 20912e9209f..fd0cbeced3a 100644
--- a/cpp/src/io/comp/nvcomp_adapter.cpp
+++ b/cpp/src/io/comp/nvcomp_adapter.cpp
@@ -31,46 +31,23 @@
 #include NVCOMP_ZSTD_HEADER
 #endif
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3)
-#define NVCOMP_HAS_ZSTD_DECOMP 1
-#else
-#define NVCOMP_HAS_ZSTD_DECOMP 0
-#endif
+#define NVCOMP_HAS_ZSTD_DECOMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 3))
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 4)
-#define NVCOMP_HAS_ZSTD_COMP 1
-#else
-#define NVCOMP_HAS_ZSTD_COMP 0
-#endif
+#define NVCOMP_HAS_ZSTD_COMP(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 4))
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION >= 3)
-#define NVCOMP_HAS_DEFLATE 1
-#else
-#define NVCOMP_HAS_DEFLATE 0
-#endif
+#define NVCOMP_HAS_DEFLATE(MAJOR, MINOR, PATCH) (MAJOR > 2 or (MAJOR == 2 and MINOR >= 5))
 
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or \
-  (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and NVCOMP_PATCH_VERSION >= 1)
-#define NVCOMP_HAS_TEMPSIZE_EX 1
-#else
-#define NVCOMP_HAS_TEMPSIZE_EX 0
-#endif
+#define NVCOMP_HAS_TEMPSIZE_EX(MAJOR, MINOR, PATCH) \
+  (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 1))
 
 // ZSTD is stable for nvcomp 2.3.2 or newer
-#if NVCOMP_MAJOR_VERSION > 2 or (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION > 3) or \
-  (NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 3 and NVCOMP_PATCH_VERSION >= 2)
-#define NVCOMP_ZSTD_IS_STABLE 1
-#else
-#define NVCOMP_ZSTD_IS_STABLE 0
-#endif
+#define NVCOMP_ZSTD_DECOMP_IS_STABLE(MAJOR, MINOR, PATCH) \
+  (MAJOR > 2 or (MAJOR == 2 and MINOR > 3) or (MAJOR == 2 and MINOR == 3 and PATCH >= 2))
 
 // Issue https://github.com/NVIDIA/spark-rapids/issues/6614 impacts nvCOMP 2.4.0 ZSTD decompression
 // on compute 6.x
-#if NVCOMP_MAJOR_VERSION == 2 and NVCOMP_MINOR_VERSION == 4 and NVCOMP_PATCH_VERSION == 0
-#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL 1
-#else
-#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL 0
-#endif
+#define NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL(MAJOR, MINOR, PATCH) \
+  (MAJOR == 2 and MINOR == 4 and PATCH == 0)
 
 namespace cudf::io::nvcomp {
 
@@ -79,12 +56,12 @@ template <typename... Args>
 std::optional<nvcompStatus_t> batched_decompress_get_temp_size_ex(compression_type compression,
                                                                   Args&&... args)
 {
-#if NVCOMP_HAS_TEMPSIZE_EX
+#if NVCOMP_HAS_TEMPSIZE_EX(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
   switch (compression) {
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressGetTempSizeEx(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_DECOMP
+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       return nvcompBatchedZstdDecompressGetTempSizeEx(std::forward<Args>(args)...);
 #else
       return std::nullopt;
@@ -104,16 +81,18 @@ auto batched_decompress_get_temp_size(compression_type compression, Args&&... ar
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressGetTempSize(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_DECOMP
+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       return nvcompBatchedZstdDecompressGetTempSize(std::forward<Args>(args)...);
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Decompression error: " +
+                nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value());
 #endif
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       return nvcompBatchedDeflateDecompressGetTempSize(std::forward<Args>(args)...);
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Decompression error: " +
+                nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value());
 #endif
     default: CUDF_FAIL("Unsupported compression type");
   }
@@ -127,16 +106,18 @@ auto batched_decompress_async(compression_type compression, Args&&... args)
     case compression_type::SNAPPY:
       return nvcompBatchedSnappyDecompressAsync(std::forward<Args>(args)...);
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_DECOMP
+#if NVCOMP_HAS_ZSTD_DECOMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       return nvcompBatchedZstdDecompressAsync(std::forward<Args>(args)...);
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Decompression error: " +
+                nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD).value());
 #endif
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       return nvcompBatchedDeflateDecompressAsync(std::forward<Args>(args)...);
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Decompression error: " +
+                nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE).value());
 #endif
     default: CUDF_FAIL("Unsupported compression type");
   }
@@ -163,22 +144,6 @@ size_t batched_decompress_temp_size(compression_type compression,
   return temp_size;
 }
 
-void check_is_zstd_enabled()
-{
-  CUDF_EXPECTS(NVCOMP_HAS_ZSTD_DECOMP, "nvCOMP 2.3 or newer is required for Zstandard compression");
-  CUDF_EXPECTS(NVCOMP_ZSTD_IS_STABLE or cudf::io::detail::nvcomp_integration::is_all_enabled(),
-               "Zstandard compression is experimental, you can enable it through "
-               "`LIBCUDF_NVCOMP_POLICY` environment variable.");
-
-#if NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL
-  int device;
-  int cc_major;
-  CUDF_CUDA_TRY(cudaGetDevice(&device));
-  CUDF_CUDA_TRY(cudaDeviceGetAttribute(&cc_major, cudaDevAttrComputeCapabilityMajor, device));
-  CUDF_EXPECTS(cc_major != 6, "Zstandard decompression is disabled on Pascal GPUs");
-#endif
-}
-
 void batched_decompress(compression_type compression,
                         device_span<device_span<uint8_t const> const> inputs,
                         device_span<device_span<uint8_t> const> outputs,
@@ -187,8 +152,6 @@ void batched_decompress(compression_type compression,
                         size_t max_total_uncomp_size,
                         rmm::cuda_stream_view stream)
 {
-  if (compression == compression_type::ZSTD) { check_is_zstd_enabled(); }
-
   auto const num_chunks = inputs.size();
 
   // cuDF inflate inputs converted to nvcomp inputs
@@ -228,20 +191,22 @@ auto batched_compress_temp_size(compression_type compression,
         batch_size, max_uncompressed_chunk_bytes, nvcompBatchedSnappyDefaultOpts, &temp_size);
       break;
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       nvcomp_status = nvcompBatchedDeflateCompressGetTempSize(
         batch_size, max_uncompressed_chunk_bytes, nvcompBatchedDeflateDefaultOpts, &temp_size);
       break;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value());
 #endif
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       nvcomp_status = nvcompBatchedZstdCompressGetTempSize(
         batch_size, max_uncompressed_chunk_bytes, nvcompBatchedZstdDefaultOpts, &temp_size);
       break;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value());
 #endif
     default: CUDF_FAIL("Unsupported compression type");
   }
@@ -266,20 +231,22 @@ size_t compress_max_output_chunk_size(compression_type compression,
         capped_uncomp_bytes, nvcompBatchedSnappyDefaultOpts, &max_comp_chunk_size);
       break;
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       status = nvcompBatchedDeflateCompressGetMaxOutputChunkSize(
         capped_uncomp_bytes, nvcompBatchedDeflateDefaultOpts, &max_comp_chunk_size);
       break;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value());
 #endif
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       status = nvcompBatchedZstdCompressGetMaxOutputChunkSize(
         capped_uncomp_bytes, nvcompBatchedZstdDefaultOpts, &max_comp_chunk_size);
       break;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value());
 #endif
     default: CUDF_FAIL("Unsupported compression type");
   }
@@ -316,7 +283,7 @@ static void batched_compress_async(compression_type compression,
                                                        stream.value());
       break;
     case compression_type::DEFLATE:
-#if NVCOMP_HAS_DEFLATE
+#if NVCOMP_HAS_DEFLATE(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       nvcomp_status = nvcompBatchedDeflateCompressAsync(device_uncompressed_ptrs,
                                                         device_uncompressed_bytes,
                                                         max_uncompressed_chunk_bytes,
@@ -329,10 +296,11 @@ static void batched_compress_async(compression_type compression,
                                                         stream.value());
       break;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE).value());
 #endif
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       nvcomp_status = nvcompBatchedZstdCompressAsync(device_uncompressed_ptrs,
                                                      device_uncompressed_bytes,
                                                      max_uncompressed_chunk_bytes,
@@ -345,7 +313,8 @@ static void batched_compress_async(compression_type compression,
                                                      stream.value());
       break;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value());
 #endif
     default: CUDF_FAIL("Unsupported compression type");
   }
@@ -390,18 +359,109 @@ void batched_compress(compression_type compression,
   update_compression_results(actual_compressed_data_sizes, results, stream);
 }
 
-bool is_compression_enabled(compression_type compression)
+feature_status_parameters::feature_status_parameters()
+  : lib_major_version{NVCOMP_MAJOR_VERSION},
+    lib_minor_version{NVCOMP_MINOR_VERSION},
+    lib_patch_version{NVCOMP_PATCH_VERSION},
+    are_all_integrations_enabled{detail::nvcomp_integration::is_all_enabled()},
+    are_stable_integrations_enabled{detail::nvcomp_integration::is_stable_enabled()}
+{
+  int device;
+  CUDF_CUDA_TRY(cudaGetDevice(&device));
+  CUDF_CUDA_TRY(
+    cudaDeviceGetAttribute(&compute_capability_major, cudaDevAttrComputeCapabilityMajor, device));
+}
+
+std::optional<std::string> is_compression_disabled(compression_type compression,
+                                                   feature_status_parameters params)
 {
   switch (compression) {
-    case compression_type::DEFLATE:
-      // See https://github.com/rapidsai/cudf/issues/11812
-      return false;
-    case compression_type::SNAPPY: return detail::nvcomp_integration::is_stable_enabled();
-    case compression_type::ZSTD:
-      return NVCOMP_HAS_ZSTD_COMP and detail::nvcomp_integration::is_stable_enabled();
-    default: return false;
+    case compression_type::DEFLATE: {
+      if (not NVCOMP_HAS_DEFLATE(
+            params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) {
+        return "nvCOMP 2.5 or newer is required for Deflate compression";
+      }
+      if (not params.are_all_integrations_enabled) {
+        return "DEFLATE compression is experimental, you can enable it through "
+               "`LIBCUDF_NVCOMP_POLICY` environment variable.";
+      }
+      return std::nullopt;
+    }
+    case compression_type::SNAPPY: {
+      if (not params.are_stable_integrations_enabled) {
+        return "Snappy compression has been disabled through the `LIBCUDF_NVCOMP_POLICY` "
+               "environment variable.";
+      }
+      return std::nullopt;
+    }
+    case compression_type::ZSTD: {
+      if (not NVCOMP_HAS_ZSTD_COMP(
+            params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) {
+        return "nvCOMP 2.4 or newer is required for Zstandard compression";
+      }
+      if (not params.are_stable_integrations_enabled) {
+        return "Zstandard compression is experimental, you can enable it through "
+               "`LIBCUDF_NVCOMP_POLICY` environment variable.";
+      }
+      return std::nullopt;
+    }
+    default: return "Unsupported compression type";
+  }
+  return "Unsupported compression type";
+}
+
+std::optional<std::string> is_zstd_decomp_disabled(feature_status_parameters const& params)
+{
+  if (not NVCOMP_HAS_ZSTD_DECOMP(
+        params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) {
+    return "nvCOMP 2.3 or newer is required for Zstandard decompression";
+  }
+
+  if (NVCOMP_ZSTD_DECOMP_IS_STABLE(
+        params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) {
+    if (not params.are_stable_integrations_enabled) {
+      return "Zstandard decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` "
+             "environment variable.";
+    }
+  } else if (not params.are_all_integrations_enabled) {
+    return "Zstandard decompression is experimental, you can enable it through "
+           "`LIBCUDF_NVCOMP_POLICY` environment variable.";
+  }
+
+  if (NVCOMP_ZSTD_IS_DISABLED_ON_PASCAL(
+        params.lib_major_version, params.lib_minor_version, params.lib_patch_version) and
+      params.compute_capability_major == 6) {
+    return "Zstandard decompression is disabled on Pascal GPUs";
+  }
+  return std::nullopt;
+}
+
+std::optional<std::string> is_decompression_disabled(compression_type compression,
+                                                     feature_status_parameters params)
+{
+  switch (compression) {
+    case compression_type::DEFLATE: {
+      if (not NVCOMP_HAS_DEFLATE(
+            params.lib_major_version, params.lib_minor_version, params.lib_patch_version)) {
+        return "nvCOMP 2.5 or newer is required for Deflate decompression";
+      }
+      if (not params.are_all_integrations_enabled) {
+        return "DEFLATE decompression is experimental, you can enable it through "
+               "`LIBCUDF_NVCOMP_POLICY` environment variable.";
+      }
+      return std::nullopt;
+    }
+    case compression_type::SNAPPY: {
+      if (not params.are_stable_integrations_enabled) {
+        return "Snappy decompression has been disabled through the `LIBCUDF_NVCOMP_POLICY` "
+               "environment variable.";
+      }
+      return std::nullopt;
+    }
+    case compression_type::ZSTD: return is_zstd_decomp_disabled(params);
+    default: return "Unsupported compression type";
   }
-  return false;
+  return "Unsupported compression type";
 }
 
 size_t compress_input_alignment_bits(compression_type compression)
@@ -430,10 +490,11 @@ std::optional<size_t> compress_max_allowed_chunk_size(compression_type compressi
     case compression_type::DEFLATE: return 64 * 1024;
     case compression_type::SNAPPY: return std::nullopt;
     case compression_type::ZSTD:
-#if NVCOMP_HAS_ZSTD_COMP
+#if NVCOMP_HAS_ZSTD_COMP(NVCOMP_MAJOR_VERSION, NVCOMP_MINOR_VERSION, NVCOMP_PATCH_VERSION)
       return nvcompZstdCompressionMaxAllowedChunkSize;
 #else
-      CUDF_FAIL("Unsupported compression type");
+      CUDF_FAIL("Compression error: " +
+                nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD).value());
 #endif
     default: return std::nullopt;
   }
diff --git a/cpp/src/io/comp/nvcomp_adapter.hpp b/cpp/src/io/comp/nvcomp_adapter.hpp
index a13cb031163..a6bde7957c7 100644
--- a/cpp/src/io/comp/nvcomp_adapter.hpp
+++ b/cpp/src/io/comp/nvcomp_adapter.hpp
@@ -18,6 +18,8 @@
 
 #include "gpuinflate.hpp"
 
+#include <io/utilities/config_utils.hpp>
+
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/span.hpp>
 
@@ -30,14 +32,52 @@ namespace cudf::io::nvcomp {
 enum class compression_type { SNAPPY, ZSTD, DEFLATE };
 
 /**
- * @brief Whether the given compression type is enabled through nvCOMP.
+ * @brief Set of parameters that impact whether the use nvCOMP features is enabled.
+ */
+struct feature_status_parameters {
+  int lib_major_version;
+  int lib_minor_version;
+  int lib_patch_version;
+  bool are_all_integrations_enabled;
+  bool are_stable_integrations_enabled;
+  int compute_capability_major;
+
+  feature_status_parameters();
+  feature_status_parameters(
+    int major, int minor, int patch, bool all_enabled, bool stable_enabled, int cc_major)
+    : lib_major_version{major},
+      lib_minor_version{minor},
+      lib_patch_version{patch},
+      are_all_integrations_enabled{all_enabled},
+      are_stable_integrations_enabled{stable_enabled},
+      compute_capability_major{cc_major}
+  {
+  }
+};
+
+/**
+ * @brief If a compression type is disabled through nvCOMP, returns the reason as a string.
+ *
+ * Result cab depend on nvCOMP version and environment variables.
+ *
+ * @param compression Compression type
+ * @param params Optional parameters to query status with different configurations
+ * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled
+ */
+[[nodiscard]] std::optional<std::string> is_compression_disabled(
+  compression_type compression, feature_status_parameters params = feature_status_parameters());
+
+/**
+ * @brief If a decompression type is disabled through nvCOMP, returns the reason as a string.
  *
- * Result depends on nvCOMP version and environment variables.
+ * Result can depend on nvCOMP version and environment variables.
  *
  * @param compression Compression type
- * @returns true if nvCOMP use is enabled; false otherwise
+ * @param params Optional parameters to query status with different configurations
+ * @returns Reason for the feature disablement, `std::nullopt` if the feature is enabled
  */
-[[nodiscard]] bool is_compression_enabled(compression_type compression);
+[[nodiscard]] std::optional<std::string> is_decompression_disabled(
+  compression_type compression, feature_status_parameters params = feature_status_parameters());
 
 /**
  * @brief Device batch decompression of given type.
diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index 7fb83b2a24e..7a135c1f2f2 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -379,8 +379,10 @@ rmm::device_buffer reader::impl::decompress_stripe_data(
     device_span<device_span<uint8_t>> inflate_out_view{inflate_out.data(), num_compressed_blocks};
     switch (decompressor.compression()) {
       case compression_type::ZLIB:
-        // See https://github.com/rapidsai/cudf/issues/11812
-        if (false) {
+        if (nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE)) {
+          gpuinflate(
+            inflate_in_view, inflate_out_view, inflate_res, gzip_header_included::NO, stream);
+        } else {
           nvcomp::batched_decompress(nvcomp::compression_type::DEFLATE,
                                      inflate_in_view,
                                      inflate_out_view,
@@ -388,13 +390,12 @@ rmm::device_buffer reader::impl::decompress_stripe_data(
                                      max_uncomp_block_size,
                                      total_decomp_size,
                                      stream);
-        } else {
-          gpuinflate(
-            inflate_in_view, inflate_out_view, inflate_res, gzip_header_included::NO, stream);
         }
         break;
       case compression_type::SNAPPY:
-        if (nvcomp_integration::is_stable_enabled()) {
+        if (nvcomp::is_decompression_disabled(nvcomp::compression_type::SNAPPY)) {
+          gpu_unsnap(inflate_in_view, inflate_out_view, inflate_res, stream);
+        } else {
           nvcomp::batched_decompress(nvcomp::compression_type::SNAPPY,
                                      inflate_in_view,
                                      inflate_out_view,
@@ -402,11 +403,13 @@ rmm::device_buffer reader::impl::decompress_stripe_data(
                                      max_uncomp_block_size,
                                      total_decomp_size,
                                      stream);
-        } else {
-          gpu_unsnap(inflate_in_view, inflate_out_view, inflate_res, stream);
         }
         break;
       case compression_type::ZSTD:
+        if (auto const reason = nvcomp::is_decompression_disabled(nvcomp::compression_type::ZSTD);
+            reason) {
+          CUDF_FAIL("Decompression error: " + reason.value());
+        }
         nvcomp::batched_decompress(nvcomp::compression_type::ZSTD,
                                    inflate_in_view,
                                    inflate_out_view,
@@ -522,8 +525,8 @@ void update_null_mask(cudf::detail::hostdevice_2dvector<gpu::ColumnDesc>& chunks
           parent_mask_len, mask_state::ALL_NULL, rmm::cuda_stream_view(stream), mr);
         auto merged_mask      = static_cast<bitmask_type*>(merged_null_mask.data());
         uint32_t* dst_idx_ptr = dst_idx.data();
-        // Copy child valid bits from child column to valid indexes, this will merge both child and
-        // parent null masks
+        // Copy child valid bits from child column to valid indexes, this will merge both child
+        // and parent null masks
         thrust::for_each(rmm::exec_policy(stream),
                          thrust::make_counting_iterator(0),
                          thrust::make_counting_iterator(0) + dst_idx.size(),
diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index 109030ef160..013761343d3 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -1332,11 +1332,11 @@ void CompressOrcDataStreams(uint8_t* compressed_data,
 
   if (compression == SNAPPY) {
     try {
-      if (nvcomp::is_compression_enabled(nvcomp::compression_type::SNAPPY)) {
+      if (nvcomp::is_compression_disabled(nvcomp::compression_type::SNAPPY)) {
+        gpu_snap(comp_in, comp_out, comp_res, stream);
+      } else {
         nvcomp::batched_compress(
           nvcomp::compression_type::SNAPPY, comp_in, comp_out, comp_res, stream);
-      } else {
-        gpu_snap(comp_in, comp_out, comp_res, stream);
       }
     } catch (...) {
       // There was an error in compressing so set an error status for each block
@@ -1348,12 +1348,18 @@ void CompressOrcDataStreams(uint8_t* compressed_data,
       // Since SNAPPY is the default compression (may not be explicitly requested), fall back to
       // writing without compression
     }
-  } else if (compression == ZLIB and
-             nvcomp::is_compression_enabled(nvcomp::compression_type::DEFLATE)) {
+  } else if (compression == ZLIB) {
+    if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::DEFLATE);
+        reason) {
+      CUDF_FAIL("Compression error: " + reason.value());
+    }
     nvcomp::batched_compress(
       nvcomp::compression_type::DEFLATE, comp_in, comp_out, comp_res, stream);
-  } else if (compression == ZSTD and
-             nvcomp::is_compression_enabled(nvcomp::compression_type::ZSTD)) {
+  } else if (compression == ZSTD) {
+    if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD);
+        reason) {
+      CUDF_FAIL("Compression error: " + reason.value());
+    }
     nvcomp::batched_compress(nvcomp::compression_type::ZSTD, comp_in, comp_out, comp_res, stream);
   } else if (compression != NONE) {
     CUDF_FAIL("Unsupported compression type");
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index a5e9e9da4cb..c0ae58a64d9 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -118,9 +118,9 @@ constexpr size_t compression_block_size(orc::CompressionKind compression)
   if (compression == orc::CompressionKind::NONE) { return 0; }
 
   auto const ncomp_type   = to_nvcomp_compression_type(compression);
-  auto const nvcomp_limit = nvcomp::is_compression_enabled(ncomp_type)
-                              ? nvcomp::compress_max_allowed_chunk_size(ncomp_type)
-                              : std::nullopt;
+  auto const nvcomp_limit = nvcomp::is_compression_disabled(ncomp_type)
+                              ? std::nullopt
+                              : nvcomp::compress_max_allowed_chunk_size(ncomp_type);
 
   constexpr size_t max_block_size = 256 * 1024;
   return std::min(nvcomp_limit.value_or(max_block_size), max_block_size);
@@ -537,7 +537,7 @@ constexpr size_t RLE_stream_size(TypeKind kind, size_t count)
 auto uncomp_block_alignment(CompressionKind compression_kind)
 {
   if (compression_kind == NONE or
-      not nvcomp::is_compression_enabled(to_nvcomp_compression_type(compression_kind))) {
+      nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) {
     return 1u;
   }
 
@@ -547,7 +547,7 @@ auto uncomp_block_alignment(CompressionKind compression_kind)
 auto comp_block_alignment(CompressionKind compression_kind)
 {
   if (compression_kind == NONE or
-      not nvcomp::is_compression_enabled(to_nvcomp_compression_type(compression_kind))) {
+      nvcomp::is_compression_disabled(to_nvcomp_compression_type(compression_kind))) {
     return 1u;
   }
 
@@ -2161,7 +2161,8 @@ void writer::impl::write(table_view const& table)
 
   auto dec_chunk_sizes = decimal_chunk_sizes(orc_table, segmentation, stream);
 
-  auto const uncomp_block_align = uncomp_block_alignment(compression_kind_);
+  auto const uncompressed_block_align = uncomp_block_alignment(compression_kind_);
+  auto const compressed_block_align   = comp_block_alignment(compression_kind_);
   auto streams =
     create_streams(orc_table.columns, segmentation, decimal_column_sizes(dec_chunk_sizes.rg_sizes));
   auto enc_data = encode_columns(orc_table,
@@ -2169,7 +2170,7 @@ void writer::impl::write(table_view const& table)
                                  std::move(dec_chunk_sizes),
                                  segmentation,
                                  streams,
-                                 uncomp_block_align,
+                                 uncompressed_block_align,
                                  stream);
 
   // Assemble individual disparate column chunks into contiguous data streams
@@ -2187,9 +2188,9 @@ void writer::impl::write(table_view const& table)
     auto const max_compressed_block_size =
       max_compression_output_size(compression_kind_, compression_blocksize_);
     auto const padded_max_compressed_block_size =
-      util::round_up_unsafe<size_t>(max_compressed_block_size, uncomp_block_align);
+      util::round_up_unsafe<size_t>(max_compressed_block_size, compressed_block_align);
     auto const padded_block_header_size =
-      util::round_up_unsafe<size_t>(block_header_size, uncomp_block_align);
+      util::round_up_unsafe<size_t>(block_header_size, compressed_block_align);
 
     auto stream_output = [&]() {
       size_t max_stream_size = 0;
@@ -2238,7 +2239,7 @@ void writer::impl::write(table_view const& table)
                                   compression_kind_,
                                   compression_blocksize_,
                                   max_compressed_block_size,
-                                  comp_block_alignment(compression_kind_),
+                                  compressed_block_align,
                                   strm_descs,
                                   enc_data.streams,
                                   comp_results,
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 090c275fcbc..26b3f97616f 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -927,7 +927,7 @@ auto to_nvcomp_compression_type(Compression codec)
 auto page_alignment(Compression codec)
 {
   if (codec == Compression::UNCOMPRESSED or
-      not nvcomp::is_compression_enabled(to_nvcomp_compression_type(codec))) {
+      nvcomp::is_compression_disabled(to_nvcomp_compression_type(codec))) {
     return 1u;
   }
 
@@ -1172,19 +1172,22 @@ void writer::impl::encode_pages(hostdevice_2dvector<gpu::EncColumnChunk>& chunks
   gpu::EncodePages(batch_pages, comp_in, comp_out, comp_res, stream);
   switch (compression_) {
     case parquet::Compression::SNAPPY:
-      if (nvcomp::is_compression_enabled(nvcomp::compression_type::SNAPPY)) {
+      if (nvcomp::is_compression_disabled(nvcomp::compression_type::SNAPPY)) {
+        gpu_snap(comp_in, comp_out, comp_res, stream);
+      } else {
         nvcomp::batched_compress(
           nvcomp::compression_type::SNAPPY, comp_in, comp_out, comp_res, stream);
-      } else {
-        gpu_snap(comp_in, comp_out, comp_res, stream);
       }
       break;
-    case parquet::Compression::ZSTD:
-      if (nvcomp::is_compression_enabled(nvcomp::compression_type::ZSTD)) {
-        nvcomp::batched_compress(
-          nvcomp::compression_type::ZSTD, comp_in, comp_out, comp_res, stream);
+    case parquet::Compression::ZSTD: {
+      if (auto const reason = nvcomp::is_compression_disabled(nvcomp::compression_type::ZSTD);
+          reason) {
+        CUDF_FAIL("Compression error: " + reason.value());
       }
+      nvcomp::batched_compress(nvcomp::compression_type::ZSTD, comp_in, comp_out, comp_res, stream);
+
       break;
+    }
     case parquet::Compression::UNCOMPRESSED: break;
     default: CUDF_FAIL("invalid compression type");
   }
@@ -1246,9 +1249,9 @@ size_t max_page_bytes(Compression compression, size_t max_page_size_bytes)
   if (compression == parquet::Compression::UNCOMPRESSED) { return max_page_size_bytes; }
 
   auto const ncomp_type   = to_nvcomp_compression_type(compression);
-  auto const nvcomp_limit = nvcomp::is_compression_enabled(ncomp_type)
-                              ? nvcomp::compress_max_allowed_chunk_size(ncomp_type)
-                              : std::nullopt;
+  auto const nvcomp_limit = nvcomp::is_compression_disabled(ncomp_type)
+                              ? std::nullopt
+                              : nvcomp::compress_max_allowed_chunk_size(ncomp_type);
 
   return std::min(nvcomp_limit.value_or(max_page_size_bytes), max_page_size_bytes);
 }
diff --git a/cpp/src/io/text/bgzip_data_chunk_source.cu b/cpp/src/io/text/bgzip_data_chunk_source.cu
index 22955deeabb..3fa68cd8b0f 100644
--- a/cpp/src/io/text/bgzip_data_chunk_source.cu
+++ b/cpp/src/io/text/bgzip_data_chunk_source.cu
@@ -144,7 +144,13 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
         bgzip_nvcomp_transform_functor{reinterpret_cast<uint8_t const*>(d_compressed_blocks.data()),
                                        reinterpret_cast<uint8_t*>(d_decompressed_blocks.begin())});
       if (decompressed_size() > 0) {
-        if (cudf::io::detail::nvcomp_integration::is_all_enabled()) {
+        if (nvcomp::is_decompression_disabled(nvcomp::compression_type::DEFLATE)) {
+          gpuinflate(d_compressed_spans,
+                     d_decompressed_spans,
+                     d_decompression_results,
+                     gzip_header_included::NO,
+                     stream);
+        } else {
           cudf::io::nvcomp::batched_decompress(cudf::io::nvcomp::compression_type::DEFLATE,
                                                d_compressed_spans,
                                                d_decompressed_spans,
@@ -152,12 +158,6 @@ class bgzip_data_chunk_reader : public data_chunk_reader {
                                                max_decompressed_size,
                                                decompressed_size(),
                                                stream);
-        } else {
-          gpuinflate(d_compressed_spans,
-                     d_decompressed_spans,
-                     d_decompression_results,
-                     gzip_header_included::NO,
-                     stream);
         }
       }
       is_decompressed = true;
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index 51dfc467e00..a97f44bce43 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -16,6 +16,7 @@
 
 #include <io/comp/gpuinflate.hpp>
 #include <io/utilities/hostdevice_vector.hpp>
+#include <src/io/comp/nvcomp_adapter.hpp>
 
 #include <cudf/utilities/default_stream.hpp>
 
@@ -118,6 +119,9 @@ struct BrotliDecompressTest : public DecompressTest<BrotliDecompressTest> {
   }
 };
 
+struct NvcompConfigTest : public cudf::test::BaseFixture {
+};
+
 TEST_F(GzipDecompressTest, HelloWorld)
 {
   constexpr char uncompressed[]  = "hello world";
@@ -166,4 +170,58 @@ TEST_F(BrotliDecompressTest, HelloWorld)
   EXPECT_EQ(output, input);
 }
 
+TEST_F(NvcompConfigTest, Compression)
+{
+  using cudf::io::nvcomp::compression_type;
+  auto const& comp_disabled = cudf::io::nvcomp::is_compression_disabled;
+
+  EXPECT_FALSE(comp_disabled(compression_type::DEFLATE, {2, 5, 0, true, true, 0}));
+  // version 2.5 required
+  EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {2, 4, 0, true, true, 0}));
+  // all integrations enabled required
+  EXPECT_TRUE(comp_disabled(compression_type::DEFLATE, {2, 5, 0, false, true, 0}));
+
+  EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {2, 4, 0, true, true, 0}));
+  EXPECT_FALSE(comp_disabled(compression_type::ZSTD, {2, 4, 0, false, true, 0}));
+  // 2.4 version required
+  EXPECT_TRUE(comp_disabled(compression_type::ZSTD, {2, 3, 1, false, true, 0}));
+  // stable integrations enabled required
+  EXPECT_TRUE(comp_disabled(compression_type::ZSTD, {2, 4, 0, false, false, 0}));
+
+  EXPECT_FALSE(comp_disabled(compression_type::SNAPPY, {2, 5, 0, true, true, 0}));
+  EXPECT_FALSE(comp_disabled(compression_type::SNAPPY, {2, 4, 0, false, true, 0}));
+  // stable integrations enabled required
+  EXPECT_TRUE(comp_disabled(compression_type::SNAPPY, {2, 3, 0, false, false, 0}));
+}
+
+TEST_F(NvcompConfigTest, Decompression)
+{
+  using cudf::io::nvcomp::compression_type;
+  auto const& decomp_disabled = cudf::io::nvcomp::is_decompression_disabled;
+
+  EXPECT_FALSE(decomp_disabled(compression_type::DEFLATE, {2, 5, 0, true, true, 7}));
+  // version 2.5 required
+  EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {2, 4, 0, true, true, 7}));
+  // all integrations enabled required
+  EXPECT_TRUE(decomp_disabled(compression_type::DEFLATE, {2, 5, 0, false, true, 7}));
+
+  EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {2, 4, 0, true, true, 7}));
+  EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {2, 3, 2, false, true, 6}));
+  EXPECT_FALSE(decomp_disabled(compression_type::ZSTD, {2, 3, 0, true, true, 6}));
+  // 2.3.1 and earlier requires all integrations to be enabled
+  EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 3, 1, false, true, 7}));
+  // 2.3 version required
+  EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 2, 0, true, true, 7}));
+  // stable integrations enabled required
+  EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 4, 0, false, false, 7}));
+  // 2.4.0 disabled on Pascal
+  EXPECT_TRUE(decomp_disabled(compression_type::ZSTD, {2, 4, 0, true, true, 6}));
+
+  EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {2, 4, 0, true, true, 7}));
+  EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {2, 3, 0, false, true, 7}));
+  EXPECT_FALSE(decomp_disabled(compression_type::SNAPPY, {2, 2, 0, false, true, 7}));
+  // stable integrations enabled required
+  EXPECT_TRUE(decomp_disabled(compression_type::SNAPPY, {2, 2, 0, false, false, 7}));
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index d3eceeddc10..6a55fece6ff 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -2658,11 +2658,11 @@ def test_parquet_writer_zstd():
 
     buff = BytesIO()
     try:
-        expected.to_orc(buff, compression="ZSTD")
+        expected.to_parquet(buff, compression="ZSTD")
     except RuntimeError:
         pytest.mark.xfail(reason="Newer nvCOMP version is required")
     else:
-        got = pd.read_orc(buff)
+        got = pd.read_parquet(buff)
         assert_eq(expected, got)
 
 
From 866875238255c1450dad9cb67e156e0d009eb36c Mon Sep 17 00:00:00 2001
From: Alessandro Bellina <abellina@nvidia.com>
Date: Fri, 11 Nov 2022 12:37:20 -0600
Subject: [PATCH 159/202] Adds an EventHandler to Java MemoryBuffer to be
 invoked on close (#12125)

This PR adds an EventHandler to `MemoryBuffer` with a single method `onClosed`. This is invoked during the `close` call, but after the `refCount` has been updated.

I am also making `getRefCount` public in this PR. Spill code in the RAPIDS Accelerator for Spark could likely assert/require that refCount==1 when taking in a new buffer to be spillable. This last change is a nice to have.

Authors:
  - Alessandro Bellina (https://github.com/abellina)

Approvers:
  - Robert (Bobby) Evans (https://github.com/revans2)
  - Jim Brennan (https://github.com/jbrennan333)
  - Jason Lowe (https://github.com/jlowe)

URL: https://github.com/rapidsai/cudf/pull/12125
---
 .../java/ai/rapids/cudf/MemoryBuffer.java     | 51 +++++++++++++++++--
 .../java/ai/rapids/cudf/MemoryBufferTest.java | 49 +++++++++++++++++-
 2 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java
index 9f0d9a451c0..e6b3994235d 100644
--- a/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java
+++ b/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2019-2020, NVIDIA CORPORATION.
+ *  Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -28,6 +28,23 @@
  * subclassing beyond what is included in CUDF is not recommended and not supported.
  */
 abstract public class MemoryBuffer implements AutoCloseable {
+  /**
+   * Interface to handle events for this MemoryBuffer. Only invoked during
+   * close, hence `onClosed` is the only event.
+   */
+  public interface EventHandler {
+    /**
+     * `onClosed` is invoked with the updated `refCount` during `close`.
+     * The last invocation of `onClosed` will be with `refCount=0`.
+     *
+     * @note the callback is invoked with this `MemoryBuffer`'s lock held.
+     *
+     * @param refCount - the updated ref count for this MemoryBuffer at the time
+     *                 of invocation
+     */
+    void onClosed(int refCount);
+  }
+
   private static final Logger log = LoggerFactory.getLogger(MemoryBuffer.class);
   protected final long address;
   protected final long length;
@@ -36,6 +53,8 @@ abstract public class MemoryBuffer implements AutoCloseable {
   protected final MemoryBufferCleaner cleaner;
   protected final long id;
 
+  private EventHandler eventHandler;
+
   public static abstract class MemoryBufferCleaner extends MemoryCleaner.Cleaner{}
 
   private static final class SlicedBufferCleaner extends MemoryBufferCleaner {
@@ -193,6 +212,27 @@ public final void copyFromMemoryBufferAsync(
    */
   public abstract MemoryBuffer slice(long offset, long len);
 
+  /**
+   * Set an event handler for this buffer. This method can be invoked with null
+   * to unset the handler.
+   *
+   * @param newHandler - the EventHandler to use from this point forward
+   * @return the prior event handler, or null if not set.
+   */
+  public synchronized EventHandler setEventHandler(EventHandler newHandler) {
+    EventHandler prev = this.eventHandler;
+    this.eventHandler = newHandler;
+    return prev;
+  }
+
+  /**
+   * Returns the current event handler for this buffer or null if no handler
+   * is associated or this buffer is closed.
+   */
+  public synchronized EventHandler getEventHandler() {
+    return this.eventHandler;
+  }
+
   /**
    * Close this buffer and free memory
    */
@@ -200,6 +240,9 @@ public synchronized void close() {
     if (cleaner != null) {
       refCount--;
       cleaner.delRef();
+      if (eventHandler != null) {
+        eventHandler.onClosed(refCount);
+      }
       if (refCount == 0) {
         cleaner.clean(false);
         closed = true;
@@ -232,8 +275,10 @@ public synchronized void incRefCount() {
     cleaner.addRef();
   }
 
-  // visible for testing
-  synchronized int getRefCount() {
+  /**
+   * Get the current reference count for this buffer.
+   */
+  public synchronized int getRefCount() {
     return refCount;
   }
 }
diff --git a/java/src/test/java/ai/rapids/cudf/MemoryBufferTest.java b/java/src/test/java/ai/rapids/cudf/MemoryBufferTest.java
index df710c71f63..c332ce660d1 100644
--- a/java/src/test/java/ai/rapids/cudf/MemoryBufferTest.java
+++ b/java/src/test/java/ai/rapids/cudf/MemoryBufferTest.java
@@ -1,6 +1,6 @@
 /*
  *
- *  Copyright (c) 2021, NVIDIA CORPORATION.
+ *  Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  *  Licensed under the Apache License, Version 2.0 (the "License");
  *  you may not use this file except in compliance with the License.
@@ -20,6 +20,8 @@
 
 import org.junit.jupiter.api.Test;
 
+import java.util.concurrent.atomic.AtomicInteger;
+
 import static org.junit.jupiter.api.Assertions.*;
 
 public class MemoryBufferTest extends CudfTestBase {
@@ -168,4 +170,49 @@ private void verifyOutput(HostMemoryBuffer out) {
     out.getBytes(bytes, 0, 0, 16);
     assertArrayEquals(EXPECTED, bytes);
   }
+
+  @Test
+  public void testEventHandlerIsCalledForEachClose() {
+    final AtomicInteger onClosedWasCalled = new AtomicInteger(0);
+    try (DeviceMemoryBuffer b = DeviceMemoryBuffer.allocate(256)) {
+      b.setEventHandler(refCount -> onClosedWasCalled.incrementAndGet());
+    }
+    assertEquals(1, onClosedWasCalled.get());
+    onClosedWasCalled.set(0);
+
+    try (DeviceMemoryBuffer b = DeviceMemoryBuffer.allocate(256)) {
+      b.setEventHandler(refCount -> onClosedWasCalled.incrementAndGet());
+      DeviceMemoryBuffer sliced = b.slice(0, b.getLength());
+      sliced.close();
+    }
+    assertEquals(2, onClosedWasCalled.get());
+  }
+
+  @Test
+  public void testEventHandlerIsNotCalledIfNotSet() {
+    final AtomicInteger onClosedWasCalled = new AtomicInteger(0);
+    try (DeviceMemoryBuffer b = DeviceMemoryBuffer.allocate(256)) {
+      assertNull(b.getEventHandler());
+    }
+    assertEquals(0, onClosedWasCalled.get());
+    try (DeviceMemoryBuffer b = DeviceMemoryBuffer.allocate(256)) {
+      b.setEventHandler(refCount -> onClosedWasCalled.incrementAndGet());
+      b.setEventHandler(null);
+    }
+    assertEquals(0, onClosedWasCalled.get());
+  }
+
+  @Test
+  public void testEventHandlerReturnsPreviousHandlerOnReset() {
+    try (DeviceMemoryBuffer b = DeviceMemoryBuffer.allocate(256)) {
+      MemoryBuffer.EventHandler handler = refCount -> {};
+      MemoryBuffer.EventHandler handler2 = refCount -> {};
+
+      assertNull(b.setEventHandler(handler));
+      assertEquals(handler, b.setEventHandler(null));
+
+      assertNull(b.setEventHandler(handler2));
+      assertEquals(handler2, b.setEventHandler(handler));
+    }
+  }
 }

From 825f0491cb07dec70cda20865c0f2e3fd3f984f3 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 14 Nov 2022 10:44:38 +0000
Subject: [PATCH 160/202] Fix singleton-range `__setitem__` edge case (#12075)

When trying to set a length-one range with a length-one array, an off-by-one error in `copying.copy_range` meant that the value was discarded. Fix that, and tidy up the semantics of `copy_range` a little while we're here. Closes #12073.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12075
---
 python/cudf/cudf/_lib/copying.pyx      | 41 +++++++++++++-------------
 python/cudf/cudf/tests/test_setitem.py | 10 +++++++
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index d9a7a5b8754..1de91e6a3e9 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -132,36 +132,37 @@ def _copy_range(Column input_column,
     return Column.from_unique_ptr(move(c_result))
 
 
-def copy_range(Column input_column,
+def copy_range(Column source_column,
                Column target_column,
-               size_type input_begin,
-               size_type input_end,
+               size_type source_begin,
+               size_type source_end,
                size_type target_begin,
                size_type target_end,
                bool inplace):
     """
-    Copy input_column from input_begin to input_end to
-    target_column from target_begin to target_end
-    """
-
-    if abs(target_end - target_begin) <= 1:
-        return target_column
+    Copy a contiguous range from a source to a target column
 
-    if target_begin < 0:
-        target_begin = target_begin + target_column.size
-
-    if target_end < 0:
-        target_end = target_end + target_column.size
+    Notes
+    -----
+    Expects the source and target ranges to have been sanitised to be
+    in-range for the source and target column respectively. For
+    example via ``slice.indices``.
+    """
 
-    if target_begin > target_end:
+    assert (
+        source_end - source_begin == target_end - target_begin,
+        "Source and target ranges must be same length"
+    )
+    if target_end >= target_begin and inplace:
+        # FIXME: Are we allowed to do this when inplace=False?
         return target_column
 
-    if inplace is True:
-        _copy_range_in_place(input_column, target_column,
-                             input_begin, input_end, target_begin)
+    if inplace:
+        _copy_range_in_place(source_column, target_column,
+                             source_begin, source_end, target_begin)
     else:
-        return _copy_range(input_column, target_column,
-                           input_begin, input_end, target_begin)
+        return _copy_range(source_column, target_column,
+                           source_begin, source_end, target_begin)
 
 
 def gather(
diff --git a/python/cudf/cudf/tests/test_setitem.py b/python/cudf/cudf/tests/test_setitem.py
index ac9dbecda65..0298a62b9d2 100644
--- a/python/cudf/cudf/tests/test_setitem.py
+++ b/python/cudf/cudf/tests/test_setitem.py
@@ -108,6 +108,16 @@ def test_series_set_item(psr, arg):
     assert_eq(psr, gsr)
 
 
+def test_series_setitem_singleton_range():
+    sr = cudf.Series([1, 2, 3], dtype=np.int64)
+    psr = sr.to_pandas()
+    value = np.asarray([7], dtype=np.int64)
+    sr.iloc[:1] = value
+    psr.iloc[:1] = value
+    assert_eq(sr, cudf.Series([7, 2, 3], dtype=np.int64))
+    assert_eq(sr, psr, check_dtype=True)
+
+
 @pytest.mark.parametrize(
     "df",
     [

From 5081fb11c1755cf460396fa6745ed749cb3fc4ec Mon Sep 17 00:00:00 2001
From: "Richard (Rick) Zamora" <rzamora217@gmail.com>
Date: Mon, 14 Nov 2022 12:05:04 -0600
Subject: [PATCH 161/202] Enable automatic column projection in groupby().agg
 (#12124)

This PR corresponds to the Dask-cudf version of https://github.com/dask/dask/pull/9442, which was found to improve the performance of many groupby-based workflows. After this PR,

```python
import dask_cudf

path = "/criteo-dataset/day_0.parquet"
ddf = dask_cudf.read_parquet(path, split_row_groups=10)

# The following takes <2s with this PR, and fails with
# an OOM error on main (using a 32GB GPU):
ddf.groupby("C1").agg({"C2": "mean"}).compute()
```

Authors:
  - Richard (Rick) Zamora (https://github.com/rjzamora)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12124
---
 python/dask_cudf/dask_cudf/groupby.py            | 7 ++++++-
 python/dask_cudf/dask_cudf/tests/test_groupby.py | 4 ++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
index 54f8958c9eb..a56f70e7ae2 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/groupby.py
@@ -685,8 +685,13 @@ def groupby_agg(
             "with `sort=False`, or set `shuffle=True`."
         )
 
+    # Determine required columns to enable column projection
+    required_columns = list(
+        set(gb_cols).union(aggs.keys()).intersection(ddf.columns)
+    )
+
     return aca(
-        [ddf],
+        [ddf[required_columns]],
         chunk=chunk,
         chunk_kwargs=chunk_kwargs,
         combine=combine,
diff --git a/python/dask_cudf/dask_cudf/tests/test_groupby.py b/python/dask_cudf/dask_cudf/tests/test_groupby.py
index e43fead0b63..1f018e79ff7 100644
--- a/python/dask_cudf/dask_cudf/tests/test_groupby.py
+++ b/python/dask_cudf/dask_cudf/tests/test_groupby.py
@@ -130,6 +130,10 @@ def test_groupby_agg(func, aggregation, pdf):
 
     assert_cudf_groupby_layers(actual)
 
+    # groupby.agg should add an explicit getitem layer
+    # to improve/enable column projection
+    assert hlg_layer(actual.dask, "getitem")
+
     dd.assert_eq(expect, actual, check_names=False, check_dtype=check_dtype)
 
 
From b20a6e60543ccf2596cdf2c12ef944206cf3b18a Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 14 Nov 2022 15:20:07 -0600
Subject: [PATCH 162/202] Add support for `DataFrame.from_dict`\`to_dict` and
 `Series.to_dict` (#12048)

Resolves: #11934

- [x] Adds `DataFrame.from_dict` and `DataFrame.to_dict`
- [x] Adds `Series.to_dict`

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12048
---
 docs/cudf/source/api_docs/dataframe.rst  |   2 +
 docs/cudf/source/api_docs/series.rst     |   1 +
 python/cudf/cudf/core/dataframe.py       | 249 +++++++++++++++++++++++
 python/cudf/cudf/core/frame.py           |  12 +-
 python/cudf/cudf/core/indexed_frame.py   |   7 -
 python/cudf/cudf/core/series.py          |  39 ++++
 python/cudf/cudf/tests/test_dataframe.py | 182 +++++++++++++++--
 python/cudf/cudf/tests/test_series.py    |  12 ++
 python/dask_cudf/dask_cudf/backends.py   |  20 +-
 9 files changed, 486 insertions(+), 38 deletions(-)

diff --git a/docs/cudf/source/api_docs/dataframe.rst b/docs/cudf/source/api_docs/dataframe.rst
index 609ef9ea0b9..db3bde8cca0 100644
--- a/docs/cudf/source/api_docs/dataframe.rst
+++ b/docs/cudf/source/api_docs/dataframe.rst
@@ -250,10 +250,12 @@ Serialization / IO / conversion
    :toctree: api/
 
    DataFrame.from_arrow
+   DataFrame.from_dict
    DataFrame.from_pandas
    DataFrame.from_records
    DataFrame.hash_values
    DataFrame.to_arrow
+   DataFrame.to_dict
    DataFrame.to_dlpack
    DataFrame.to_parquet
    DataFrame.to_csv
diff --git a/docs/cudf/source/api_docs/series.rst b/docs/cudf/source/api_docs/series.rst
index c721ed980ea..0b2a58b2f87 100644
--- a/docs/cudf/source/api_docs/series.rst
+++ b/docs/cudf/source/api_docs/series.rst
@@ -368,6 +368,7 @@ Serialization / IO / conversion
 
    Series.to_arrow
    Series.to_cupy
+   Series.to_dict
    Series.to_dlpack
    Series.to_frame
    Series.to_hdf
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b7ec10fee2c..99fcac57306 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1992,6 +1992,247 @@ def _make_operands_and_index_for_binop(
                     operands[k] = (left_default, v, reflect, None)
         return operands, index
 
+    @classmethod
+    @_cudf_nvtx_annotate
+    def from_dict(
+        cls,
+        data: dict,
+        orient: str = "columns",
+        dtype: Dtype = None,
+        columns: list = None,
+    ) -> DataFrame:
+        """
+        Construct DataFrame from dict of array-like or dicts.
+        Creates DataFrame object from dictionary by columns or by index
+        allowing dtype specification.
+
+        Parameters
+        ----------
+        data : dict
+            Of the form {field : array-like} or {field : dict}.
+        orient : {'columns', 'index', 'tight'}, default 'columns'
+            The "orientation" of the data. If the keys of the passed dict
+            should be the columns of the resulting DataFrame, pass 'columns'
+            (default). Otherwise if the keys should be rows, pass 'index'.
+            If 'tight', assume a dict with keys ['index', 'columns', 'data',
+            'index_names', 'column_names'].
+        dtype : dtype, default None
+            Data type to force, otherwise infer.
+        columns : list, default None
+            Column labels to use when ``orient='index'``. Raises a ``ValueError``
+            if used with ``orient='columns'`` or ``orient='tight'``.
+
+        Returns
+        -------
+        DataFrame
+
+        See Also
+        --------
+        DataFrame.from_records : DataFrame from structured ndarray, sequence
+            of tuples or dicts, or DataFrame.
+        DataFrame : DataFrame object creation using constructor.
+        DataFrame.to_dict : Convert the DataFrame to a dictionary.
+
+        Examples
+        --------
+        By default the keys of the dict become the DataFrame columns:
+
+        >>> import cudf
+        >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
+        >>> cudf.DataFrame.from_dict(data)
+           col_1 col_2
+        0      3     a
+        1      2     b
+        2      1     c
+        3      0     d
+
+        Specify ``orient='index'`` to create the DataFrame using dictionary
+        keys as rows:
+
+        >>> data = {'row_1': [3, 2, 1, 0], 'row_2': [10, 11, 12, 13]}
+        >>> cudf.DataFrame.from_dict(data, orient='index')
+                0   1   2   3
+        row_1   3   2   1   0
+        row_2  10  11  12  13
+
+        When using the 'index' orientation, the column names can be
+        specified manually:
+
+        >>> cudf.DataFrame.from_dict(data, orient='index',
+        ...                          columns=['A', 'B', 'C', 'D'])
+                A   B   C   D
+        row_1   3   2   1   0
+        row_2  10  11  12  13
+
+        Specify ``orient='tight'`` to create the DataFrame using a 'tight'
+        format:
+
+        >>> data = {'index': [('a', 'b'), ('a', 'c')],
+        ...         'columns': [('x', 1), ('y', 2)],
+        ...         'data': [[1, 3], [2, 4]],
+        ...         'index_names': ['n1', 'n2'],
+        ...         'column_names': ['z1', 'z2']}
+        >>> cudf.DataFrame.from_dict(data, orient='tight')
+        z1     x  y
+        z2     1  2
+        n1 n2
+        a  b   1  3
+           c   2  4
+        """  # noqa: E501
+
+        orient = orient.lower()
+        if orient == "index":
+            if len(data) > 0 and isinstance(
+                next(iter(data.values())), (cudf.Series, cupy.ndarray)
+            ):
+                result = cls(data).T
+                result.columns = columns
+                if dtype is not None:
+                    result = result.astype(dtype)
+                return result
+            else:
+                return cls.from_pandas(
+                    pd.DataFrame.from_dict(
+                        data=data,
+                        orient=orient,
+                        dtype=dtype,
+                        columns=columns,
+                    )
+                )
+        elif orient == "columns":
+            if columns is not None:
+                raise ValueError(
+                    "Cannot use columns parameter with orient='columns'"
+                )
+            return cls(data, columns=None, dtype=dtype)
+        elif orient == "tight":
+            if columns is not None:
+                raise ValueError(
+                    "Cannot use columns parameter with orient='right'"
+                )
+
+            index = _from_dict_create_index(
+                data["index"], data["index_names"], cudf
+            )
+            columns = _from_dict_create_index(
+                data["columns"], data["column_names"], pd
+            )
+            return cls(data["data"], index=index, columns=columns, dtype=dtype)
+        else:
+            raise ValueError(
+                "Expected 'index', 'columns' or 'tight' for orient "
+                f"parameter. Got '{orient}' instead"
+            )
+
+    @_cudf_nvtx_annotate
+    def to_dict(
+        self,
+        orient: str = "dict",
+        into: type[dict] = dict,
+    ) -> dict | list[dict]:
+        """
+        Convert the DataFrame to a dictionary.
+
+        The type of the key-value pairs can be customized with the parameters
+        (see below).
+
+        Parameters
+        ----------
+        orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
+            Determines the type of the values of the dictionary.
+
+            - 'dict' (default) : dict like {column -> {index -> value}}
+            - 'list' : dict like {column -> [values]}
+            - 'series' : dict like {column -> Series(values)}
+            - 'split' : dict like
+              {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
+            - 'tight' : dict like
+              {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+              'index_names' -> [index.names], 'column_names' -> [column.names]}
+            - 'records' : list like
+              [{column -> value}, ... , {column -> value}]
+            - 'index' : dict like {index -> {column -> value}}
+            Abbreviations are allowed. `s` indicates `series` and `sp`
+            indicates `split`.
+
+        into : class, default dict
+            The collections.abc.Mapping subclass used for all Mappings
+            in the return value.  Can be the actual class or an empty
+            instance of the mapping type you want.  If you want a
+            collections.defaultdict, you must pass it initialized.
+
+        Returns
+        -------
+        dict, list or collections.abc.Mapping
+            Return a collections.abc.Mapping object representing the DataFrame.
+            The resulting transformation depends on the `orient` parameter.
+
+        See Also
+        --------
+        DataFrame.from_dict: Create a DataFrame from a dictionary.
+        DataFrame.to_json: Convert a DataFrame to JSON format.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> df = cudf.DataFrame({'col1': [1, 2],
+        ...                      'col2': [0.5, 0.75]},
+        ...                     index=['row1', 'row2'])
+        >>> df
+              col1  col2
+        row1     1  0.50
+        row2     2  0.75
+        >>> df.to_dict()
+        {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
+
+        You can specify the return orientation.
+
+        >>> df.to_dict('series')
+        {'col1': row1    1
+                 row2    2
+        Name: col1, dtype: int64,
+        'col2': row1    0.50
+                row2    0.75
+        Name: col2, dtype: float64}
+
+        >>> df.to_dict('split')
+        {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+         'data': [[1, 0.5], [2, 0.75]]}
+
+        >>> df.to_dict('records')
+        [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
+
+        >>> df.to_dict('index')
+        {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
+
+        >>> df.to_dict('tight')
+        {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
+         'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
+
+        You can also specify the mapping type.
+
+        >>> from collections import OrderedDict, defaultdict
+        >>> df.to_dict(into=OrderedDict)
+        OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
+                     ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])
+
+        If you want a `defaultdict`, you need to initialize it:
+
+        >>> dd = defaultdict(list)
+        >>> df.to_dict('records', into=dd)
+        [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
+         defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
+        """  # noqa: E501
+        orient = orient.lower()
+
+        if orient == "series":
+            # Special case needed to avoid converting
+            # cudf.Series objects into pd.Series
+            into_c = pd.core.common.standardize_mapping(into)
+            return into_c((k, v) for k, v in self.items())
+
+        return self.to_pandas().to_dict(orient=orient, into=into)
+
     @_cudf_nvtx_annotate
     def scatter_by_map(
         self, map_index, map_size=None, keep_index=True, **kwargs
@@ -7444,3 +7685,11 @@ def _reassign_categories(categories, cols, col_idxs):
                 offset=cols[name].offset,
                 size=cols[name].size,
             )
+
+
+def _from_dict_create_index(indexlist, namelist, library):
+    if len(namelist) > 1:
+        index = library.MultiIndex.from_tuples(indexlist, names=namelist)
+    else:
+        index = library.Index(indexlist, name=namelist[0])
+    return index
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 29d5c9ae26d..687338f882d 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -321,15 +321,19 @@ def __len__(self):
 
     @_cudf_nvtx_annotate
     def astype(self, dtype, copy=False, **kwargs):
-        result = {}
+        result_data = {}
         for col_name, col in self._data.items():
             dt = dtype.get(col_name, col.dtype)
             if not is_dtype_equal(dt, col.dtype):
-                result[col_name] = col.astype(dt, copy=copy, **kwargs)
+                result_data[col_name] = col.astype(dt, copy=copy, **kwargs)
             else:
-                result[col_name] = col.copy() if copy else col
+                result_data[col_name] = col.copy() if copy else col
 
-        return result
+        return ColumnAccessor._create_unsafe(
+            data=result_data,
+            multiindex=self._data.multiindex,
+            level_names=self._data.level_names,
+        )
 
     @_cudf_nvtx_annotate
     def equals(self, other):
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 49f7101183e..28039366725 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -270,13 +270,6 @@ def __init__(self, data=None, index=None):
         # to ensure that this constructor is always invoked with an index.
         self._index = index
 
-    def to_dict(self, *args, **kwargs):  # noqa: D102
-        raise TypeError(
-            "cuDF does not support conversion to host memory "
-            "via `to_dict()` method. Consider using "
-            "`.to_pandas().to_dict()` to construct a Python dictionary."
-        )
-
     @property
     def _num_rows(self) -> int:
         # Important to use the index because the data may be empty.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 8c30ae258db..873bebf1292 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -731,6 +731,45 @@ def drop(
             labels, axis, index, columns, level, inplace, errors
         )
 
+    @_cudf_nvtx_annotate
+    def to_dict(self, into: type[dict] = dict) -> dict:
+        """
+        Convert Series to {label -> value} dict or dict-like object.
+
+        Parameters
+        ----------
+        into : class, default dict
+            The collections.abc.Mapping subclass to use as the return
+            object. Can be the actual class or an empty
+            instance of the mapping type you want.  If you want a
+            collections.defaultdict, you must pass it initialized.
+
+        Returns
+        -------
+        collections.abc.Mapping
+            Key-value representation of Series.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series([1, 2, 3, 4])
+        >>> s
+        0    1
+        1    2
+        2    3
+        3    4
+        dtype: int64
+        >>> s.to_dict()
+        {0: 1, 1: 2, 2: 3, 3: 4}
+        >>> from collections import OrderedDict, defaultdict
+        >>> s.to_dict(OrderedDict)
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> dd = defaultdict(list)
+        >>> s.to_dict(dd)
+        defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
+        """
+        return self.to_pandas().to_dict(into=into)
+
     @_cudf_nvtx_annotate
     def append(self, to_append, ignore_index=False, verify_integrity=False):
         """Append values from another ``Series`` or array-like object.
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 58bee95326f..4ec770e0d6b 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9,6 +9,7 @@
 import string
 import textwrap
 import warnings
+from collections import OrderedDict, defaultdict
 from contextlib import contextmanager
 from copy import copy
 
@@ -6818,27 +6819,172 @@ def test_cudf_isclose_different_index():
     assert_eq(expected, cudf.isclose(s1, s2))
 
 
-def test_dataframe_to_dict_error():
-    df = cudf.DataFrame({"a": [1, 2, 3], "b": [9, 5, 3]})
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            r"cuDF does not support conversion to host memory "
-            r"via `to_dict()` method. Consider using "
-            r"`.to_pandas().to_dict()` to construct a Python dictionary."
+@pytest.mark.parametrize(
+    "orient", ["dict", "list", "split", "tight", "records", "index", "series"]
+)
+@pytest.mark.parametrize("into", [dict, OrderedDict, defaultdict(list)])
+def test_dataframe_to_dict(orient, into):
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": [9, 5, 3]}, index=[10, 11, 12])
+    pdf = df.to_pandas()
+
+    actual = df.to_dict(orient=orient, into=into)
+    expected = pdf.to_dict(orient=orient, into=into)
+    if orient == "series":
+        assert actual.keys() == expected.keys()
+        for key in actual.keys():
+            assert_eq(expected[key], actual[key])
+    else:
+        assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "data, orient, dtype, columns",
+    [
+        (
+            {"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]},
+            "columns",
+            None,
+            None,
         ),
-    ):
-        df.to_dict()
+        ({"col_1": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}, "index", None, None),
+        (
+            {"col_1": [None, 2, 1, 0], "col_2": [3, None, 1, 0]},
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "col_1": ["ab", "cd", "ef", "gh"],
+                "col_2": ["zx", "one", "two", "three"],
+            },
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "index": [("a", "b"), ("a", "c")],
+                "columns": [("x", 1), ("y", 2)],
+                "data": [[1, 3], [2, 4]],
+                "index_names": ["n1", "n2"],
+                "column_names": ["z1", "z2"],
+            },
+            "tight",
+            "float64",
+            None,
+        ),
+    ],
+)
+def test_dataframe_from_dict(data, orient, dtype, columns):
 
-    with pytest.raises(
-        TypeError,
-        match=re.escape(
-            r"cuDF does not support conversion to host memory "
-            r"via `to_dict()` method. Consider using "
-            r"`.to_pandas().to_dict()` to construct a Python dictionary."
+    expected = pd.DataFrame.from_dict(
+        data=data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    actual = cudf.DataFrame.from_dict(
+        data=data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize("dtype", ["int64", "str", None])
+def test_dataframe_from_dict_transposed(dtype):
+    pd_data = {"a": [3, 2, 1, 0], "col_2": [3, 2, 1, 0]}
+    gd_data = {key: cudf.Series(val) for key, val in pd_data.items()}
+
+    expected = pd.DataFrame.from_dict(pd_data, orient="index", dtype=dtype)
+    actual = cudf.DataFrame.from_dict(gd_data, orient="index", dtype=dtype)
+
+    gd_data = {key: cupy.asarray(val) for key, val in pd_data.items()}
+    actual = cudf.DataFrame.from_dict(gd_data, orient="index", dtype=dtype)
+    assert_eq(expected, actual)
+
+
+@pytest.mark.parametrize(
+    "pd_data, gd_data, orient, dtype, columns",
+    [
+        (
+            {"col_1": np.array([3, 2, 1, 0]), "col_2": np.array([3, 2, 1, 0])},
+            {
+                "col_1": cupy.array([3, 2, 1, 0]),
+                "col_2": cupy.array([3, 2, 1, 0]),
+            },
+            "columns",
+            None,
+            None,
         ),
-    ):
-        df["a"].to_dict()
+        (
+            {"col_1": np.array([3, 2, 1, 0]), "col_2": np.array([3, 2, 1, 0])},
+            {
+                "col_1": cupy.array([3, 2, 1, 0]),
+                "col_2": cupy.array([3, 2, 1, 0]),
+            },
+            "index",
+            None,
+            None,
+        ),
+        (
+            {
+                "col_1": np.array([None, 2, 1, 0]),
+                "col_2": np.array([3, None, 1, 0]),
+            },
+            {
+                "col_1": cupy.array([np.nan, 2, 1, 0]),
+                "col_2": cupy.array([3, np.nan, 1, 0]),
+            },
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "col_1": np.array(["ab", "cd", "ef", "gh"]),
+                "col_2": np.array(["zx", "one", "two", "three"]),
+            },
+            {
+                "col_1": np.array(["ab", "cd", "ef", "gh"]),
+                "col_2": np.array(["zx", "one", "two", "three"]),
+            },
+            "index",
+            None,
+            ["A", "B", "C", "D"],
+        ),
+        (
+            {
+                "index": [("a", "b"), ("a", "c")],
+                "columns": [("x", 1), ("y", 2)],
+                "data": [np.array([1, 3]), np.array([2, 4])],
+                "index_names": ["n1", "n2"],
+                "column_names": ["z1", "z2"],
+            },
+            {
+                "index": [("a", "b"), ("a", "c")],
+                "columns": [("x", 1), ("y", 2)],
+                "data": [cupy.array([1, 3]), cupy.array([2, 4])],
+                "index_names": ["n1", "n2"],
+                "column_names": ["z1", "z2"],
+            },
+            "tight",
+            "float64",
+            None,
+        ),
+    ],
+)
+def test_dataframe_from_dict_cp_np_arrays(
+    pd_data, gd_data, orient, dtype, columns
+):
+
+    expected = pd.DataFrame.from_dict(
+        data=pd_data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    actual = cudf.DataFrame.from_dict(
+        data=gd_data, orient=orient, dtype=dtype, columns=columns
+    )
+
+    assert_eq(expected, actual, check_dtype=dtype is not None)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index d5af2899bb0..c902bcb8b47 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -3,6 +3,7 @@
 import hashlib
 import operator
 import re
+from collections import OrderedDict, defaultdict
 from string import ascii_letters, digits
 
 import cupy as cp
@@ -1992,3 +1993,14 @@ def test_set_bool_error(dtype, bool_scalar):
         lfunc_args_and_kwargs=([bool_scalar],),
         rfunc_args_and_kwargs=([bool_scalar],),
     )
+
+
+@pytest.mark.parametrize("into", [dict, OrderedDict, defaultdict(list)])
+def test_series_to_dict(into):
+    gs = cudf.Series(["ab", "de", "zx"], index=[10, 20, 100])
+    ps = gs.to_pandas()
+
+    actual = gs.to_dict(into=into)
+    expected = ps.to_dict(into=into)
+
+    assert_eq(expected, actual)
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 58f3d807f51..49b5e725fed 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -469,16 +469,18 @@ class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
         """
 
         @staticmethod
-        def from_dict(data, npartitions, orient="columns", **kwargs):
-            from dask_cudf import from_cudf
-
-            if orient != "columns":
-                raise ValueError(f"orient={orient} is not supported")
-            # TODO: Use cudf.from_dict
-            # (See: https://github.com/rapidsai/cudf/issues/11934)
-            return from_cudf(
-                cudf.DataFrame(data),
+        def from_dict(
+            data, npartitions, orient="columns", dtype=None, columns=None
+        ):
+
+            return _default_backend(
+                dd.from_dict,
+                data,
                 npartitions=npartitions,
+                orient=orient,
+                dtype=dtype,
+                columns=columns,
+                constructor=cudf.DataFrame,
             )
 
         @staticmethod

From b2e5069dce4cc99b229ddf77e480c9fd89e01ceb Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Mon, 14 Nov 2022 21:38:55 -0800
Subject: [PATCH 163/202] Create an `int8` column in `read_csv` when all
 elements are missing (#12110)

CSV reader creates int8 columns when all elements are null. However, when all elements in a
column are missing (e.g. `names` option includes more columns than the CSV file), CSV reader creates an `int64` column. Such columns take up a lot more device memory.

This PR changes the behavior so that all columns with no valid elements are created as `int8`.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Yunsong Wang (https://github.com/PointKernel)
  - Mark Harris (https://github.com/harrism)

URL: https://github.com/rapidsai/cudf/pull/12110
---
 cpp/src/io/csv/reader_impl.cu                 |  3 +--
 .../io/utilities/column_type_histogram.hpp    |  5 +++++
 cpp/tests/io/csv_test.cpp                     | 21 +++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index 9da56b9bef8..f812f272c25 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -538,8 +538,7 @@ void infer_column_types(parse_options const& parse_opts,
     auto const& stats = column_stats[inf_col_idx++];
     unsigned long long int_count_total =
       stats.big_int_count + stats.negative_small_int_count + stats.positive_small_int_count;
-
-    if (stats.null_count == num_records) {
+    if (stats.null_count == num_records or stats.total_count() == 0) {
       // Entire column is NULL; allocate the smallest amount of memory
       column_types[col_idx] = data_type(cudf::type_id::INT8);
     } else if (stats.string_count > 0L) {
diff --git a/cpp/src/io/utilities/column_type_histogram.hpp b/cpp/src/io/utilities/column_type_histogram.hpp
index 8bd2d3a89cf..88f4e58f9b1 100644
--- a/cpp/src/io/utilities/column_type_histogram.hpp
+++ b/cpp/src/io/utilities/column_type_histogram.hpp
@@ -33,6 +33,11 @@ struct column_type_histogram {
   cudf::size_type positive_small_int_count{};
   cudf::size_type big_int_count{};
   cudf::size_type bool_count{};
+  auto total_count() const
+  {
+    return null_count + float_count + datetime_count + string_count + negative_small_int_count +
+           positive_small_int_count + big_int_count + bool_count;
+  }
 };
 
 }  // namespace io
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 8acc6f8f6ee..eeca87446ec 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -2244,6 +2244,27 @@ TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch)
   EXPECT_EQ(new_table_and_metadata.metadata.column_names[1], "1");
 }
 
+TEST_F(CsvReaderTest, EmptyColumns)
+{
+  // First column only has empty fields. second column contains only "null" literals
+  std::string csv_in{",null\n,null"};
+
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+      .names({"a", "b", "c", "d"})
+      .header(-1);
+  // More elements in `names` than in the file; additional columns are filled with nulls
+  auto result = cudf::io::read_csv(in_opts);
+
+  const auto result_table = result.tbl->view();
+  EXPECT_EQ(result_table.num_columns(), 4);
+  // All columns should contain only nulls; expect INT8 type to use as little memory as possible
+  for (auto& column : result_table) {
+    EXPECT_EQ(column.type(), data_type{type_id::INT8});
+    EXPECT_EQ(column.null_count(), 2);
+  }
+}
+
 TEST_F(CsvReaderTest, BlankLineAfterFirstRow)
 {
   std::string csv_in{"12,9., 10\n\n"};

From fd488cd82f26761f76a13bfa807eae5b10093a28 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 15 Nov 2022 21:51:23 +0530
Subject: [PATCH 164/202] Cleanup common parsing code in JSON, CSV reader
 (#12022)

This PR will cleanup nested json reader and csv reader's common parsing code.
- Uses `std::optional` for indicating parsing failure in `parse_numeric`
- Cleanup
  - Removed `decode_value` as it only gives only specialization for timestamp and duration types, rest of types are passthrough.
  - Unified `decode_digit`

Depends on #11898 and #12021

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/12022
---
 cpp/src/io/utilities/parsing_utils.cuh | 154 ++++++++-----------------
 cpp/src/strings/json/json_path.cu      |   7 +-
 cpp/tests/io/json_test.cpp             | 112 ++++++++++++++++++
 3 files changed, 166 insertions(+), 107 deletions(-)

diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh
index cbd417c2b5b..89806956ae5 100644
--- a/cpp/src/io/utilities/parsing_utils.cuh
+++ b/cpp/src/io/utilities/parsing_utils.cuh
@@ -117,8 +117,9 @@ struct parse_options {
 };
 
 /**
- * @brief Returns the numeric value of an ASCII/UTF-8 character. Specialization
- * for integral types. Handles hexadecimal digits, both uppercase and lowercase.
+ * @brief Returns the numeric value of an ASCII/UTF-8 character.
+ * Handles hexadecimal digits, both uppercase and lowercase
+ * for integral types and only decimal digits for floating point types.
  * If the character is not a valid numeric digit then `0` is returned and
  * valid_flag is set to false.
  *
@@ -127,31 +128,14 @@ struct parse_options {
  *
  * @return uint8_t Numeric value of the character, or `0`
  */
-template <typename T, CUDF_ENABLE_IF(std::is_integral_v<T>)>
-constexpr uint8_t decode_digit(char c, bool* valid_flag)
-{
-  if (c >= '0' && c <= '9') return c - '0';
-  if (c >= 'a' && c <= 'f') return c - 'a' + 10;
-  if (c >= 'A' && c <= 'F') return c - 'A' + 10;
-
-  *valid_flag = false;
-  return 0;
-}
-
-/**
- * @brief Returns the numeric value of an ASCII/UTF-8 character. Specialization
- * for non-integral types. Handles only decimal digits. If the character is not
- * a valid numeric digit then `0` is returned and valid_flag is set to false.
- *
- * @param c ASCII or UTF-8 character
- * @param valid_flag Set to false if input is not valid. Unchanged otherwise.
- *
- * @return uint8_t Numeric value of the character, or `0`
- */
-template <typename T, CUDF_ENABLE_IF(!std::is_integral_v<T>)>
+template <typename T, bool as_hex = false>
 constexpr uint8_t decode_digit(char c, bool* valid_flag)
 {
   if (c >= '0' && c <= '9') return c - '0';
+  if constexpr (as_hex and std::is_integral_v<T>) {
+    if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+    if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+  }
 
   *valid_flag = false;
   return 0;
@@ -194,13 +178,13 @@ constexpr bool is_infinity(char const* begin, char const* end)
  * @return The parsed and converted value
  */
 template <typename T, int base = 10>
-constexpr T parse_numeric(char const* begin,
-                          char const* end,
-                          parse_options_view const& opts,
-                          T error_result = std::numeric_limits<T>::quiet_NaN())
+__host__ __device__ std::optional<T> parse_numeric(char const* begin,
+                                                   char const* end,
+                                                   parse_options_view const& opts)
 {
   T value{};
   bool all_digits_valid = true;
+  constexpr bool as_hex = (base == 16);
 
   // Handle negative values if necessary
   int32_t sign = (*begin == '-') ? -1 : 1;
@@ -223,7 +207,7 @@ constexpr T parse_numeric(char const* begin,
     } else if (base == 10 && (*begin == 'e' || *begin == 'E')) {
       break;
     } else if (*begin != opts.thousands && *begin != '+') {
-      value = (value * base) + decode_digit<T>(*begin, &all_digits_valid);
+      value = (value * base) + decode_digit<T, as_hex>(*begin, &all_digits_valid);
     }
     ++begin;
   }
@@ -237,7 +221,7 @@ constexpr T parse_numeric(char const* begin,
         break;
       } else if (*begin != opts.thousands && *begin != '+') {
         divisor /= base;
-        value += decode_digit<T>(*begin, &all_digits_valid) * divisor;
+        value += decode_digit<T, as_hex>(*begin, &all_digits_valid) * divisor;
       }
       ++begin;
     }
@@ -248,12 +232,12 @@ constexpr T parse_numeric(char const* begin,
       if (*begin == '-' || *begin == '+') { ++begin; }
       int32_t exponent = 0;
       while (begin < end) {
-        exponent = (exponent * 10) + decode_digit<T>(*(begin++), &all_digits_valid);
+        exponent = (exponent * 10) + decode_digit<T, as_hex>(*(begin++), &all_digits_valid);
       }
       if (exponent != 0) { value *= exp10(double(exponent * exponent_sign)); }
     }
   }
-  if (!all_digits_valid) { return error_result; }
+  if (!all_digits_valid) { return std::optional<T>{}; }
 
   return value * sign;
 }
@@ -485,7 +469,7 @@ cudf::size_type count_all_from_set(host_span<char const> data,
 /**
  * @brief Checks whether the given character is a whitespace character.
  *
- * @param[in] ch The character to check
+ * @param ch The character to check
  *
  * @return True if the input is whitespace, False otherwise
  */
@@ -567,65 +551,6 @@ __inline__ __device__ std::pair<char const*, char const*> trim_quotes(char const
   return {begin, end};
 }
 
-/**
- * @brief Decodes a numeric value base on templated cudf type T with specified
- * base.
- *
- * @param[in] begin Beginning of the character string
- * @param[in] end End of the character string
- * @param opts The global parsing behavior options
- *
- * @return The parsed numeric value
- */
-template <typename T, int base>
-__inline__ __device__ T decode_value(char const* begin,
-                                     char const* end,
-                                     parse_options_view const& opts)
-{
-  return cudf::io::parse_numeric<T, base>(begin, end, opts);
-}
-
-/**
- * @brief Decodes a numeric value base on templated cudf type T
- *
- * @param[in] begin Beginning of the character string
- * @param[in] end End of the character string
- * @param opts The global parsing behavior options
- *
- * @return The parsed numeric value
- */
-template <typename T, CUDF_ENABLE_IF(!cudf::is_timestamp<T>() and !cudf::is_duration<T>())>
-__inline__ __device__ T decode_value(char const* begin,
-                                     char const* end,
-                                     parse_options_view const& opts)
-{
-  return cudf::io::parse_numeric<T>(begin, end, opts);
-}
-
-template <typename T, CUDF_ENABLE_IF(cudf::is_timestamp<T>())>
-__inline__ __device__ T decode_value(char const* begin,
-                                     char const* end,
-                                     parse_options_view const& opts)
-{
-  // If this is a string value, remove quotes
-  if ((thrust::distance(begin, end) >= 2 && *begin == '\"' && *thrust::prev(end) == '\"')) {
-    thrust::advance(begin, 1);
-    thrust::advance(end, -1);
-  }
-  return to_timestamp<T>(begin, end, opts.dayfirst);
-}
-
-template <typename T, CUDF_ENABLE_IF(cudf::is_duration<T>())>
-__inline__ __device__ T decode_value(char const* begin, char const* end, parse_options_view const&)
-{
-  // If this is a string value, remove quotes
-  if ((thrust::distance(begin, end) >= 2 && *begin == '\"' && *thrust::prev(end) == '\"')) {
-    thrust::advance(begin, 1);
-    thrust::advance(end, -1);
-  }
-  return to_duration<T>(begin, end);
-}
-
 struct ConvertFunctor {
   /**
    * @brief Dispatch for numeric types whose values can be convertible to
@@ -645,13 +570,15 @@ struct ConvertFunctor {
                                                       parse_options_view const& opts,
                                                       bool as_hex = false)
   {
-    static_cast<T*>(out_buffer)[row] = [as_hex, &opts, begin, end]() -> T {
+    auto const value = [as_hex, &opts, begin, end]() -> std::optional<T> {
       // Check for user-specified true/false values
       auto const field_len = static_cast<size_t>(end - begin);
       if (serialized_trie_contains(opts.trie_true, {begin, field_len})) { return 1; }
       if (serialized_trie_contains(opts.trie_false, {begin, field_len})) { return 0; }
-      return as_hex ? decode_value<T, 16>(begin, end, opts) : decode_value<T>(begin, end, opts);
+      return as_hex ? cudf::io::parse_numeric<T, 16>(begin, end, opts)
+                    : cudf::io::parse_numeric<T>(begin, end, opts);
     }();
+    static_cast<T*>(out_buffer)[row] = value.value_or(std::numeric_limits<T>::quiet_NaN());
 
     return true;
   }
@@ -670,6 +597,7 @@ struct ConvertFunctor {
                                                       parse_options_view const& opts,
                                                       bool as_hex)
   {
+    // TODO decide what's invalid input and update parsing functions
     static_cast<device_storage_type_t<T>*>(out_buffer)[row] =
       [&opts, output_type, begin, end]() -> device_storage_type_t<T> {
       return strings::detail::parse_decimal<device_storage_type_t<T>>(
@@ -691,13 +619,18 @@ struct ConvertFunctor {
                                                       parse_options_view const& opts,
                                                       bool as_hex)
   {
-    static_cast<T*>(out_buffer)[row] = [&opts, begin, end]() {
+    auto const value = [&opts, begin, end]() -> std::optional<T> {
       // Check for user-specified true/false values
       auto const field_len = static_cast<size_t>(end - begin);
-      if (serialized_trie_contains(opts.trie_true, {begin, field_len})) { return true; }
-      if (serialized_trie_contains(opts.trie_false, {begin, field_len})) { return false; }
-      return decode_value<T>(begin, end, opts);
+      if (serialized_trie_contains(opts.trie_true, {begin, field_len})) {
+        return static_cast<T>(true);
+      }
+      if (serialized_trie_contains(opts.trie_false, {begin, field_len})) {
+        return static_cast<T>(false);
+      }
+      return cudf::io::parse_numeric<T>(begin, end, opts);
     }();
+    static_cast<T*>(out_buffer)[row] = value.value_or(std::numeric_limits<T>::quiet_NaN());
 
     return true;
   }
@@ -715,10 +648,20 @@ struct ConvertFunctor {
                                                       parse_options_view const& opts,
                                                       bool as_hex)
   {
-    T const value                    = decode_value<T>(begin, end, opts);
-    static_cast<T*>(out_buffer)[row] = value;
+    auto const value = [&opts, begin, end]() -> std::optional<T> {
+      // Check for user-specified true/false values
+      auto const field_len = static_cast<size_t>(end - begin);
+      if (serialized_trie_contains(opts.trie_true, {begin, field_len})) {
+        return static_cast<T>(true);
+      }
+      if (serialized_trie_contains(opts.trie_false, {begin, field_len})) {
+        return static_cast<T>(false);
+      }
+      return cudf::io::parse_numeric<T>(begin, end, opts);
+    }();
+    static_cast<T*>(out_buffer)[row] = value.value_or(std::numeric_limits<T>::quiet_NaN());
 
-    return !std::isnan(value);
+    return value.has_value() and !std::isnan(*value);
   }
 
   /**
@@ -735,12 +678,15 @@ struct ConvertFunctor {
                                                       parse_options_view const& opts,
                                                       bool as_hex)
   {
-    if constexpr (cudf::is_timestamp<T>() or cudf::is_duration<T>()) {
-      static_cast<T*>(out_buffer)[row] = decode_value<T>(begin, end, opts);
-      return true;
+    // TODO decide what's invalid input and update parsing functions
+    if constexpr (cudf::is_timestamp<T>()) {
+      static_cast<T*>(out_buffer)[row] = to_timestamp<T>(begin, end, opts.dayfirst);
+    } else if constexpr (cudf::is_duration<T>()) {
+      static_cast<T*>(out_buffer)[row] = to_duration<T>(begin, end);
     } else {
       return false;
     }
+    return true;
   }
 };
 
diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu
index 303c35ea7fb..5ce00eeee4a 100644
--- a/cpp/src/strings/json/json_path.cu
+++ b/cpp/src/strings/json/json_path.cu
@@ -570,9 +570,10 @@ class path_state : private parser {
               op.type          = path_operator_type::CHILD;
               op.expected_type = OBJECT;
             } else {
-              op.type  = path_operator_type::CHILD_INDEX;
-              op.index = cudf::io::parse_numeric<int>(
-                op.name.data(), op.name.data() + op.name.size_bytes(), json_opts, -1);
+              op.type          = path_operator_type::CHILD_INDEX;
+              auto const value = cudf::io::parse_numeric<int>(
+                op.name.data(), op.name.data() + op.name.size_bytes(), json_opts);
+              op.index = value.value_or(-1);
               CUDF_EXPECTS(op.index >= 0, "Invalid numeric index specified in JSONPath");
               op.expected_type = ARRAY;
             }
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index b8cd4622484..f7b21008f70 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -18,6 +18,7 @@
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 #include <cudf_test/type_lists.hpp>
 
@@ -29,6 +30,7 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 
+#include <limits>
 #include <thrust/iterator/constant_iterator.h>
 
 #include <arrow/io/api.h>
@@ -1451,4 +1453,114 @@ TEST_F(JsonReaderTest, JsonNestedDtypeSchema)
                                  int_wrapper{{1, 1, 2}, {true, true, true}});
 }
 
+TEST_P(JsonReaderParamTest, JsonDtypeParsing)
+{
+  auto const test_opt          = GetParam();
+  bool const test_experimental = (test_opt == json_test_t::json_experimental_record_orient);
+  // All corner cases of dtype parsing
+  //  0, "0", " 0", 1, "1", " 1", "a", "z", null, true, false,  "null", "true", "false", nan, "nan"
+  // Test for dtypes: bool, int, float, str, duration, timestamp
+  std::string row_orient =
+    "[0]\n[\"0\"]\n[\" 0\"]\n[1]\n[\"1\"]\n[\" 1\"]\n[\"a\"]\n[\"z\"]\n"
+    "[null]\n[true]\n[false]\n[\"null\"]\n[\"true\"]\n[\"false\"]\n[nan]\n[\"nan\"]\n";
+  std::string record_orient = to_records_orient({{{"0", "0"}},
+                                                 {{"0", "\"0\""}},
+                                                 {{"0", "\" 0\""}},
+                                                 {{"0", "1"}},
+                                                 {{"0", "\"1\""}},
+                                                 {{"0", "\" 1\""}},
+                                                 {{"0", "\"a\""}},
+                                                 {{"0", "\"z\""}},
+                                                 {{"0", "null"}},
+                                                 {{"0", "true"}},
+                                                 {{"0", "false"}},
+                                                 {{"0", "\"null\""}},
+                                                 {{"0", "\"true\""}},
+                                                 {{"0", "\"false\""}},
+                                                 {{"0", "nan"}},
+                                                 {{"0", "\"nan\""}}},
+                                                "\n");
+
+  std::string data = (test_opt == json_test_t::json_lines_row_orient) ? row_orient : record_orient;
+
+  auto make_validity = [](std::vector<int> const& validity) {
+    return cudf::detail::make_counting_transform_iterator(
+      0, [=](auto i) -> bool { return static_cast<bool>(validity[i]); });
+  };
+
+  constexpr int int_NA       = 0;
+  constexpr double double_NA = std::numeric_limits<double>::quiet_NaN();
+  constexpr bool bool_NA     = false;
+
+  std::vector<int> const validity = {1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0};
+
+  auto int_col = int_wrapper{
+    {0, 0, int_NA, 1, 1, int_NA, int_NA, int_NA, int_NA, 1, 0, int_NA, 1, 0, int_NA, int_NA},
+    cudf::test::iterators::nulls_at(std::vector<int>{8})};
+  auto float_col = float_wrapper{{0.0,
+                                  0.0,
+                                  double_NA,
+                                  1.0,
+                                  1.0,
+                                  double_NA,
+                                  double_NA,
+                                  double_NA,
+                                  double_NA,
+                                  1.0,
+                                  0.0,
+                                  double_NA,
+                                  1.0,
+                                  0.0,
+                                  double_NA,
+                                  double_NA},
+                                 make_validity(validity)};
+  auto str_col =
+    cudf::test::strings_column_wrapper{// clang-format off
+    {"0", "0", " 0", "1", "1", " 1", "a", "z", "", "true", "false", "null", "true", "false", "nan", "nan"},
+     cudf::test::iterators::nulls_at(std::vector<int>{8})};
+  // clang-format on
+  auto bool_col = bool_wrapper{{false,
+                                false,
+                                bool_NA,
+                                true,
+                                true,
+                                bool_NA,
+                                bool_NA,
+                                bool_NA,
+                                bool_NA,
+                                true,
+                                false,
+                                bool_NA,
+                                true,
+                                false,
+                                bool_NA,
+                                bool_NA},
+                               cudf::test::iterators::nulls_at(std::vector<int>{8})};
+
+  // Types to test
+  const std::vector<data_type> dtypes = {
+    dtype<int32_t>(), dtype<float>(), dtype<cudf::string_view>(), dtype<bool>()};
+  const std::vector<cudf::column_view> cols{cudf::column_view(int_col),
+                                            cudf::column_view(float_col),
+                                            cudf::column_view(str_col),
+                                            cudf::column_view(bool_col)};
+  for (size_t col_type = 0; col_type < cols.size(); col_type++) {
+    std::map<std::string, cudf::io::schema_element> dtype_schema{{"0", {dtypes[col_type]}}};
+    cudf::io::json_reader_options in_options =
+      cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
+        .dtypes(dtype_schema)
+        .lines(true)
+        .experimental(test_experimental);
+
+    cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
+
+    EXPECT_EQ(result.tbl->num_columns(), 1);
+    EXPECT_EQ(result.tbl->num_rows(), 16);
+    EXPECT_EQ(result.metadata.schema_info[0].name, "0");
+
+    EXPECT_EQ(result.tbl->get_column(0).type().id(), dtypes[col_type].id());
+    CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0), cols[col_type]);
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From bae9e39ffabb9e941d5b244e6ee0c716e69d8ca3 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Tue, 15 Nov 2022 12:34:02 -0500
Subject: [PATCH 165/202] Fix/disable jitify lto (#12122)

Remove the possiblity of jit lto from cudf

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Paul Taylor (https://github.com/trxcllnt)

URL: https://github.com/rapidsai/cudf/pull/12122
---
 cpp/cmake/Modules/JitifyPreprocessKernels.cmake | 2 +-
 cpp/cmake/thirdparty/get_jitify.cmake           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
index 0d52a064761..df285bdea55 100644
--- a/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
+++ b/cpp/cmake/Modules/JitifyPreprocessKernels.cmake
@@ -39,7 +39,7 @@ function(jit_preprocess_files)
       VERBATIM
       COMMAND ${CMAKE_COMMAND} -E make_directory "${jit_output_directory}"
       COMMAND
-        "${CMAKE_COMMAND}" -E env LD_LIBRARY_PATH="${CUDAToolkit_LIBRARY_DIR}"
+        "${CMAKE_COMMAND}" -E env LD_LIBRARY_PATH=${CUDAToolkit_LIBRARY_DIR}
         $<TARGET_FILE:jitify_preprocess> ${ARG_FILE} -o
         ${CUDF_GENERATED_INCLUDE_DIR}/include/jit_preprocessed_files -i -m -std=c++17
         -remove-unused-globals -D_FILE_OFFSET_BITS=64 -D__CUDACC_RTC__ -I${CUDF_SOURCE_DIR}/include
diff --git a/cpp/cmake/thirdparty/get_jitify.cmake b/cpp/cmake/thirdparty/get_jitify.cmake
index b7c90952c95..d98abdf8824 100644
--- a/cpp/cmake/thirdparty/get_jitify.cmake
+++ b/cpp/cmake/thirdparty/get_jitify.cmake
@@ -18,7 +18,7 @@
 function(find_and_configure_jitify)
   rapids_cpm_find(
     jitify 2.0.0
-    GIT_REPOSITORY https://github.com/NVIDIA/jitify.git
+    GIT_REPOSITORY https://github.com/rapidsai/jitify.git
     GIT_TAG jitify2
     GIT_SHALLOW TRUE
     DOWNLOAD_ONLY TRUE

From 186e129c00813f7edec9643769bc7879abbb6ca4 Mon Sep 17 00:00:00 2001
From: "Robert (Bobby) Evans" <bobby@apache.org>
Date: Tue, 15 Nov 2022 13:26:38 -0600
Subject: [PATCH 166/202] Add in negative size checks for columns (#12118)

This fixes #12116
This just adds in a few checks for negative sizes to avoid any issues with rounding errors and also helps us detect errors sooner. It will not fix small negative allocations for device buffers directly.

Authors:
  - Robert (Bobby) Evans (https://github.com/revans2)

Approvers:
  - Karthikeyan (https://github.com/karthikeyann)
  - David Wendt (https://github.com/davidwendt)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/12118
---
 cpp/include/cudf/column/column.hpp           | 3 +++
 cpp/include/cudf/column/column_factories.hpp | 7 ++++++-
 cpp/src/column/column_factories.cpp          | 5 +++++
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index 4f42910856f..c02991051d9 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -109,6 +109,8 @@ class column {
    * @note This constructor is primarily intended for use in column factory
    * functions.
    *
+   * @throws cudf::logic_error if `size < 0`
+   *
    * @param[in] dtype The element type
    * @param[in] size The number of elements in the column
    * @param[in] data The column's data
@@ -133,6 +135,7 @@ class column {
       _null_count{null_count},
       _children{std::move(children)}
   {
+    CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
   }
 
   /**
diff --git a/cpp/include/cudf/column/column_factories.hpp b/cpp/include/cudf/column/column_factories.hpp
index 85f4deecb1d..725faeae626 100644
--- a/cpp/include/cudf/column/column_factories.hpp
+++ b/cpp/include/cudf/column/column_factories.hpp
@@ -62,6 +62,7 @@ std::unique_ptr<column> make_empty_column(type_id id);
  *
  * @throws std::bad_alloc if device memory allocation fails
  * @throws cudf::logic_error if `type` is not a numeric type
+ * @throws cudf::logic_error if `size < 0`
  *
  * @param[in] type The desired numeric element type
  * @param[in] size The number of elements in the column
@@ -119,6 +120,7 @@ std::unique_ptr<column> make_numeric_column(
  * @note The column's null count is determined by the requested null mask `state`.
  *
  * @throws cudf::logic_error if `type` is not a `fixed_point` type.
+ * @throws cudf::logic_error if `size < 0`
  *
  * @param[in] type The desired `fixed_point` element type.
  * @param[in] size The number of elements in the column.
@@ -176,6 +178,7 @@ std::unique_ptr<column> make_fixed_point_column(
  *
  * @throws std::bad_alloc if device memory allocation fails
  * @throws cudf::logic_error if `type` is not a timestamp type
+ * @throws cudf::logic_error if `size < 0`
  *
  * @param[in] type The desired timestamp element type
  * @param[in] size The number of elements in the column
@@ -234,6 +237,7 @@ std::unique_ptr<column> make_timestamp_column(
  *
  * @throws std::bad_alloc if device memory allocation fails
  * @throws cudf::logic_error if `type` is not a duration type
+ * @throws cudf::logic_error if `size < 0`
  *
  * @param[in] type The desired duration element type
  * @param[in] size The number of elements in the column
@@ -292,6 +296,7 @@ std::unique_ptr<column> make_duration_column(
  *
  * @throws std::bad_alloc if device memory allocation fails
  * @throws cudf::logic_error if `type` is not a fixed width type
+ * @throws cudf::logic_error if `size < 0`
  *
  * @param[in] type The desired fixed width type
  * @param[in] size The number of elements in the column
@@ -366,7 +371,7 @@ std::unique_ptr<column> make_fixed_width_column(
  * @param[in] stream CUDA stream used for device memory operations and kernel launches.
  * @param[in] mr Device memory resource used for allocation of the column's `null_mask` and children
  * columns' device memory.
-  * @return Constructed strings column
+ * @return Constructed strings column
  */
 std::unique_ptr<column> make_strings_column(
   cudf::device_span<thrust::pair<const char*, size_type> const> strings,
diff --git a/cpp/src/column/column_factories.cpp b/cpp/src/column/column_factories.cpp
index 098e0d3e2cc..5f455e26e52 100644
--- a/cpp/src/column/column_factories.cpp
+++ b/cpp/src/column/column_factories.cpp
@@ -79,6 +79,7 @@ std::unique_ptr<column> make_numeric_column(data_type type,
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(is_numeric(type), "Invalid, non-numeric type.");
+  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
 
   return std::make_unique<column>(type,
                                   size,
@@ -97,6 +98,7 @@ std::unique_ptr<column> make_fixed_point_column(data_type type,
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(is_fixed_point(type), "Invalid, non-fixed_point type.");
+  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
 
   return std::make_unique<column>(type,
                                   size,
@@ -115,6 +117,7 @@ std::unique_ptr<column> make_timestamp_column(data_type type,
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(is_timestamp(type), "Invalid, non-timestamp type.");
+  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
 
   return std::make_unique<column>(type,
                                   size,
@@ -133,6 +136,7 @@ std::unique_ptr<column> make_duration_column(data_type type,
 {
   CUDF_FUNC_RANGE();
   CUDF_EXPECTS(is_duration(type), "Invalid, non-duration type.");
+  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
 
   return std::make_unique<column>(type,
                                   size,
@@ -166,6 +170,7 @@ std::unique_ptr<column> make_dictionary_from_scalar(scalar const& s,
                                                     rmm::mr::device_memory_resource* mr)
 {
   if (size == 0) return make_empty_column(type_id::DICTIONARY32);
+  CUDF_EXPECTS(size >= 0, "Column size cannot be negative.");
   CUDF_EXPECTS(s.is_valid(stream), "cannot create a dictionary with a null key");
   return make_dictionary_column(
     make_column_from_scalar(s, 1, stream, mr),

From 4b7f5a7c35f6558d0064e1cccedccf126e24f443 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 15 Nov 2022 13:28:34 -0600
Subject: [PATCH 167/202] Safely allocate `udf_string` pointers in
 `strings_udf` (#12138)

In `strings_udf`, functions that return strings are built around c++ methods that return a `cudf::strings::udf::udf_string` object. However due to requiring external `C` linkage, our shim functions need to work by accepting a pointer to a preallocated `udf_string` object and setting the result into the memory it points to before returning.

This piece of memory is allocated based off our `UDFString` extension class datamodel and while it is set up to be the right size, simply allocating it does not actually call the underlying `udf_string` default constructor so the memory isn't necessarily initialized in the same way a proper `udf_string` would initialize it.

This can lead to some unsafe behavior when we try and assign the result. This PR changes it so that whenever we need to allocate a `udf_string` and pass its pointer to a shim function, we first zfill that memory.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Graham Markall (https://github.com/gmarkall)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12138
---
 python/strings_udf/cpp/src/strings/udf/shim.cu | 8 ++++----
 python/strings_udf/strings_udf/lowering.py     | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 63e740c5226..21998d59bbb 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -223,7 +223,7 @@ extern "C" __device__ int udf_string_from_string_view(int* nb_retbal,
                                                       void* udf_str)
 {
   auto str_view_ptr = reinterpret_cast<cudf::string_view const*>(str);
-  auto udf_str_ptr  = reinterpret_cast<udf_string*>(udf_str);
+  auto udf_str_ptr  = new (udf_str) udf_string;
   *udf_str_ptr      = udf_string(*str_view_ptr);
 
   return 0;
@@ -236,7 +236,7 @@ extern "C" __device__ int strip(int* nb_retval,
 {
   auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
   auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
-  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+  auto udf_str_ptr   = new (udf_str) udf_string;
 
   *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr);
 
@@ -250,7 +250,7 @@ extern "C" __device__ int lstrip(int* nb_retval,
 {
   auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
   auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
-  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+  auto udf_str_ptr   = new (udf_str) udf_string;
 
   *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr, cudf::strings::side_type::LEFT);
 
@@ -264,7 +264,7 @@ extern "C" __device__ int rstrip(int* nb_retval,
 {
   auto to_strip_ptr  = reinterpret_cast<cudf::string_view const*>(to_strip);
   auto strip_str_ptr = reinterpret_cast<cudf::string_view const*>(strip_str);
-  auto udf_str_ptr   = reinterpret_cast<udf_string*>(udf_str);
+  auto udf_str_ptr   = new (udf_str) udf_string;
 
   *udf_str_ptr = strip(*to_strip_ptr, *strip_str_ptr, cudf::strings::side_type::RIGHT);
 
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 17a1869e881..a6d43ece1c5 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -199,7 +199,6 @@ def binary_func_impl(context, builder, sig, args):
                 udf_str_ptr = builder.alloca(
                     default_manager[udf_string].get_value_type()
                 )
-
                 _ = context.compile_internal(
                     builder,
                     cuda_func,

From 98880d28c52347111383c2efbb933f1ffcffce9f Mon Sep 17 00:00:00 2001
From: Benjamin Zaitlen <quasiben@users.noreply.github.com>
Date: Tue, 15 Nov 2022 14:30:19 -0500
Subject: [PATCH 168/202] Update cp.clip call (#12148)

In the upcoming CuPy 12 cp.clip slightly changed the function signature: https://github.com/cupy/cupy/blob/6d857add3d46368705e133121cf49153039952e9/cupy/_math/misc.py#L147

This PR is still valid for CuPy 11 but will also satisfy the upcoming release

Authors:
  - Benjamin Zaitlen (https://github.com/quasiben)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/12148
---
 python/cudf/cudf/core/subword_tokenizer.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
index 2c3dff7f668..4947a922cbb 100644
--- a/python/cudf/cudf/core/subword_tokenizer.py
+++ b/python/cudf/cudf/core/subword_tokenizer.py
@@ -247,7 +247,7 @@ def _bert_add_special_tokens(token_o):
     max_length = token_o["input_ids"].shape[1]
     seq_end_col = max_length - (token_o["input_ids"][:, ::-1] != 0).argmax(1)
     # clipping to take overflow into account
-    seq_end_col = cp.clip(seq_end_col + 1, a_max=max_length - 1)
+    seq_end_col = cp.clip(seq_end_col + 1, a_min=None, a_max=max_length - 1)
 
     _bert_add_special_tokens_input_ids(token_o["input_ids"], seq_end_col)
     _bert_add_special_tokens_attention_mask(
@@ -294,4 +294,6 @@ def _bert_add_special_tokens_metadata(metadata, max_length):
     # metadata seq starts from plus 1
     metadata[:, 1] = metadata[:, 1] + 1
     # clip done to take overflow into account
-    metadata[:, 2] = cp.clip(metadata[:, 2] + 1, a_max=max_length - 2)
+    metadata[:, 2] = cp.clip(
+        metadata[:, 2] + 1, a_min=None, a_max=max_length - 2
+    )

From 90f0a77df4974c2d768e99cc7cd983ab90c2ce0b Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Tue, 15 Nov 2022 15:33:29 -0500
Subject: [PATCH 169/202] Accelerate libcudf segmented sort with CUB segmented
 sort (#11969)

Moves the CUB segmented sort acceleration code logic from `cudf::lists::segmented_sorted_order` to `cudf::segmented_sorted_order` so these two functions are now aligned in behavior and performance.
This change allows `cudf::lists::segmented_sorted_order` is to use the `cudf::detail::segmented_sorted_order` for all cases and simplifies the implementation there. This is also improves the performance of `cudf::segmented_sorted_order` since appropriate columns can now use CUB's optimized segmented sort.
No function has been changed and the existing tests are sufficient -- source has only been refactored.
Added a segmented-sort benchmark using nvbench as well.

Reference #11729

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/11969
---
 cpp/benchmarks/CMakeLists.txt           |   2 +-
 cpp/benchmarks/sort/segmented_sort.cpp  |  61 ++++++
 cpp/include/cudf/utilities/traits.hpp   |  24 +++
 cpp/src/lists/segmented_sort.cu         | 276 +++---------------------
 cpp/src/sort/segmented_sort.cu          | 163 ++++++++++++++
 cpp/src/utilities/traits.cpp            |  10 +
 cpp/tests/sort/segmented_sort_tests.cpp |  18 +-
 7 files changed, 303 insertions(+), 251 deletions(-)
 create mode 100644 cpp/benchmarks/sort/segmented_sort.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index f35d0b0b49e..48c9ba5f185 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -169,7 +169,7 @@ ConfigureNVBench(SEARCH_NVBENCH search/contains.cpp)
 # ##################################################################################################
 # * sort benchmark --------------------------------------------------------------------------------
 ConfigureBench(SORT_BENCH sort/rank.cpp sort/sort.cpp sort/sort_strings.cpp)
-ConfigureNVBench(SORT_NVBENCH sort/sort_lists.cpp sort/sort_structs.cpp)
+ConfigureNVBench(SORT_NVBENCH sort/segmented_sort.cpp sort/sort_lists.cpp sort/sort_structs.cpp)
 
 # ##################################################################################################
 # * quantiles benchmark
diff --git a/cpp/benchmarks/sort/segmented_sort.cpp b/cpp/benchmarks/sort/segmented_sort.cpp
new file mode 100644
index 00000000000..7162269853c
--- /dev/null
+++ b/cpp/benchmarks/sort/segmented_sort.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/rmm_pool_raii.hpp>
+
+#include <cudf/filling.hpp>
+#include <cudf/scalar/scalar.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+void nvbench_segmented_sort(nvbench::state& state)
+{
+  cudf::rmm_pool_raii pool_raii;
+
+  auto const dtype      = cudf::type_to_id<int32_t>();
+  auto const size_bytes = static_cast<size_t>(state.get_int64("size_bytes"));
+  auto const null_freq  = state.get_float64("null_frequency");
+  auto const row_width  = static_cast<cudf::size_type>(state.get_int64("row_width"));
+
+  data_profile const table_profile =
+    data_profile_builder().null_probability(null_freq).distribution(
+      dtype, distribution_id::UNIFORM, 0, 10);
+  auto const input =
+    create_random_table({cudf::type_id::INT32}, table_size_bytes{size_bytes}, table_profile);
+  auto const rows = input->num_rows();
+
+  auto const segments = cudf::sequence((rows / row_width) + 1,
+                                       cudf::numeric_scalar<int32_t>(0),
+                                       cudf::numeric_scalar<int32_t>(row_width));
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.add_element_count(size_bytes, "bytes");
+  state.add_global_memory_reads<nvbench::int32_t>(rows * row_width);
+  state.add_global_memory_writes<nvbench::int32_t>(rows);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result = cudf::segmented_sorted_order(*input, *segments);
+  });
+}
+
+NVBENCH_BENCH(nvbench_segmented_sort)
+  .set_name("segmented_sort")
+  .add_int64_power_of_two_axis("size_bytes", {16, 18, 20, 22, 24, 28})
+  .add_float64_axis("null_frequency", {0, 0.1})
+  .add_int64_axis("row_width", {16, 128, 1024});
diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index d95ea42a039..43d43ba6bb3 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -261,6 +261,30 @@ constexpr inline bool is_signed_iterator()
   return std::is_signed_v<typename std::iterator_traits<Iterator>::value_type>;
 }
 
+/**
+ * @brief Indicates whether the type `T` is an integral type.
+ *
+ * @tparam T  The type to verify
+ * @return true `T` is integral
+ * @return false  `T` is not integral
+ */
+template <typename T>
+constexpr inline bool is_integral()
+{
+  return cuda::std::is_integral_v<T>;
+}
+
+/**
+ * @brief Indicates whether `type` is a integral `data_type`.
+ *
+ * "Integral" types are fundamental integer types such as `INT*` and `UINT*`.
+ *
+ * @param type The `data_type` to verify
+ * @return true `type` is integral
+ * @return false `type` is integral
+ */
+bool is_integral(data_type type);
+
 /**
  * @brief Indicates whether the type `T` is a floating point type.
  *
diff --git a/cpp/src/lists/segmented_sort.cu b/cpp/src/lists/segmented_sort.cu
index 1a4e3ea66ed..260636a61cf 100644
--- a/cpp/src/lists/segmented_sort.cu
+++ b/cpp/src/lists/segmented_sort.cu
@@ -15,12 +15,8 @@
  */
 
 #include <cudf/column/column.hpp>
-#include <cudf/column/column_device_view.cuh>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/copying.hpp>
 #include <cudf/detail/copy.hpp>
-#include <cudf/detail/gather.cuh>
-#include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sorting.hpp>
@@ -28,231 +24,27 @@
 #include <cudf/lists/sorting.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/error.hpp>
-#include <cudf/utilities/type_dispatcher.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_buffer.hpp>
-#include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
 
-#include <thrust/copy.h>
-#include <thrust/sequence.h>
 #include <thrust/transform.h>
 
-#include <cub/device/device_segmented_radix_sort.cuh>
-
 namespace cudf {
 namespace lists {
 namespace detail {
 
-struct SegmentedSortColumn {
-  /**
-   * @brief Compile time check for allowing radix sort for column type.
-   *
-   * Floating point is not included here because of the special handling of NaNs.
-   */
-  template <typename T>
-  static constexpr bool is_radix_sort_supported()
-  {
-    return std::is_integral<T>();
-  }
-
-  template <typename KeyT, typename ValueT, typename OffsetIteratorT>
-  void SortPairsAscending(KeyT const* keys_in,
-                          KeyT* keys_out,
-                          ValueT const* values_in,
-                          ValueT* values_out,
-                          int num_items,
-                          int num_segments,
-                          OffsetIteratorT begin_offsets,
-                          OffsetIteratorT end_offsets,
-                          rmm::cuda_stream_view stream)
-  {
-    rmm::device_buffer d_temp_storage;
-    size_t temp_storage_bytes = 0;
-    cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage.data(),
-                                             temp_storage_bytes,
-                                             keys_in,
-                                             keys_out,
-                                             values_in,
-                                             values_out,
-                                             num_items,
-                                             num_segments,
-                                             begin_offsets,
-                                             end_offsets,
-                                             0,
-                                             sizeof(KeyT) * 8,
-                                             stream.value());
-    d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
-
-    cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage.data(),
-                                             temp_storage_bytes,
-                                             keys_in,
-                                             keys_out,
-                                             values_in,
-                                             values_out,
-                                             num_items,
-                                             num_segments,
-                                             begin_offsets,
-                                             end_offsets,
-                                             0,
-                                             sizeof(KeyT) * 8,
-                                             stream.value());
-  }
-
-  template <typename KeyT, typename ValueT, typename OffsetIteratorT>
-  void SortPairsDescending(KeyT const* keys_in,
-                           KeyT* keys_out,
-                           ValueT const* values_in,
-                           ValueT* values_out,
-                           int num_items,
-                           int num_segments,
-                           OffsetIteratorT begin_offsets,
-                           OffsetIteratorT end_offsets,
-                           rmm::cuda_stream_view stream)
-  {
-    rmm::device_buffer d_temp_storage;
-    size_t temp_storage_bytes = 0;
-    cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage.data(),
-                                                       temp_storage_bytes,
-                                                       keys_in,
-                                                       keys_out,
-                                                       values_in,
-                                                       values_out,
-                                                       num_items,
-                                                       num_segments,
-                                                       begin_offsets,
-                                                       end_offsets,
-                                                       0,
-                                                       sizeof(KeyT) * 8,
-                                                       stream.value());
-    d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
-
-    cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage.data(),
-                                                       temp_storage_bytes,
-                                                       keys_in,
-                                                       keys_out,
-                                                       values_in,
-                                                       values_out,
-                                                       num_items,
-                                                       num_segments,
-                                                       begin_offsets,
-                                                       end_offsets,
-                                                       0,
-                                                       sizeof(KeyT) * 8,
-                                                       stream.value());
-  }
-
-  template <typename T>
-  std::enable_if_t<not is_radix_sort_supported<T>(), std::unique_ptr<column>> operator()(
-    column_view const& child,
-    column_view const& segment_offsets,
-    order column_order,
-    null_order null_precedence,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr)
-  {
-    auto child_table = segmented_sort_by_key(table_view{{child}},
-                                             table_view{{child}},
-                                             segment_offsets,
-                                             {column_order},
-                                             {null_precedence},
-                                             stream,
-                                             mr);
-    return std::move(child_table->release().front());
-  }
-
-  template <typename T>
-  std::enable_if_t<is_radix_sort_supported<T>(), std::unique_ptr<column>> operator()(
-    column_view const& child,
-    column_view const& offsets,
-    order column_order,
-    null_order null_precedence,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr)
-  {
-    // the average list size at which to prefer radixsort:
-    constexpr cudf::size_type MIN_AVG_LIST_SIZE_FOR_RADIXSORT{100};
-
-    if ((child.size() / offsets.size()) < MIN_AVG_LIST_SIZE_FOR_RADIXSORT) {
-      auto child_table = segmented_sort_by_key(table_view{{child}},
-                                               table_view{{child}},
-                                               offsets,
-                                               {column_order},
-                                               {null_precedence},
-                                               stream,
-                                               mr);
-      return std::move(child_table->release().front());
-    }
-
-    auto output =
-      cudf::detail::allocate_like(child, child.size(), mask_allocation_policy::NEVER, stream, mr);
-    mutable_column_view mutable_output_view = output->mutable_view();
-
-    auto keys = [&]() {
-      if (child.nullable()) {
-        rmm::device_uvector<T> keys(child.size(), stream);
-        auto const null_replace_T = null_precedence == null_order::AFTER
-                                      ? std::numeric_limits<T>::max()
-                                      : std::numeric_limits<T>::min();
-
-        auto device_child = column_device_view::create(child, stream);
-        auto keys_in =
-          cudf::detail::make_null_replacement_iterator<T>(*device_child, null_replace_T);
-        thrust::copy_n(rmm::exec_policy(stream), keys_in, child.size(), keys.begin());
-        return keys;
-      }
-      return rmm::device_uvector<T>{0, stream};
-    }();
+namespace {
 
-    std::unique_ptr<column> sorted_indices = cudf::make_numeric_column(
-      data_type(type_to_id<size_type>()), child.size(), mask_state::UNALLOCATED, stream, mr);
-    mutable_column_view mutable_indices_view = sorted_indices->mutable_view();
-    thrust::sequence(rmm::exec_policy(stream),
-                     mutable_indices_view.begin<size_type>(),
-                     mutable_indices_view.end<size_type>(),
-                     0);
-
-    if (column_order == order::ASCENDING)
-      SortPairsAscending(child.nullable() ? keys.data() : child.begin<T>(),
-                         mutable_output_view.begin<T>(),
-                         mutable_indices_view.begin<size_type>(),
-                         mutable_indices_view.begin<size_type>(),
-                         child.size(),
-                         offsets.size() - 1,
-                         offsets.begin<size_type>(),
-                         offsets.begin<size_type>() + 1,
-                         stream);
-    else
-      SortPairsDescending(child.nullable() ? keys.data() : child.begin<T>(),
-                          mutable_output_view.begin<T>(),
-                          mutable_indices_view.begin<size_type>(),
-                          mutable_indices_view.begin<size_type>(),
-                          child.size(),
-                          offsets.size() - 1,
-                          offsets.begin<size_type>(),
-                          offsets.begin<size_type>() + 1,
-                          stream);
-    std::vector<std::unique_ptr<column>> output_cols;
-    output_cols.push_back(std::move(output));
-    // rearrange the null_mask.
-    cudf::detail::gather_bitmask(cudf::table_view{{child}},
-                                 mutable_indices_view.begin<size_type>(),
-                                 output_cols,
-                                 cudf::detail::gather_bitmask_op::DONT_CHECK,
-                                 stream,
-                                 mr);
-    return std::move(output_cols.front());
-  }
-};
-
-std::unique_ptr<column> sort_lists(lists_column_view const& input,
-                                   order column_order,
-                                   null_order null_precedence,
-                                   rmm::cuda_stream_view stream,
-                                   rmm::mr::device_memory_resource* mr)
+/**
+ * @brief Create output offsets for segmented sort
+ *
+ * This creates a normalized set of offsets from the offsets child column of the input.
+ */
+std::unique_ptr<column> build_output_offsets(lists_column_view const& input,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource* mr)
 {
-  if (input.is_empty()) return empty_like(input.parent());
   auto output_offset = make_numeric_column(
     input.offsets().type(), input.size() + 1, mask_state::UNALLOCATED, stream, mr);
   thrust::transform(rmm::exec_policy(stream),
@@ -262,25 +54,35 @@ std::unique_ptr<column> sort_lists(lists_column_view const& input,
                     [first = input.offsets_begin()] __device__(auto offset_index) {
                       return offset_index - *first;
                     });
-  // for numeric columns, calls Faster segmented radix sort path
-  // for non-numeric columns, calls segmented_sort_by_key.
-  auto output_child = type_dispatcher<dispatch_storage_type>(input.child().type(),
-                                                             SegmentedSortColumn{},
-                                                             input.get_sliced_child(stream),
-                                                             output_offset->view(),
-                                                             column_order,
-                                                             null_precedence,
-                                                             stream,
-                                                             mr);
+  return output_offset;
+}
+
+}  // namespace
+
+std::unique_ptr<column> sort_lists(lists_column_view const& input,
+                                   order column_order,
+                                   null_order null_precedence,
+                                   rmm::cuda_stream_view stream,
+                                   rmm::mr::device_memory_resource* mr)
+{
+  if (input.is_empty()) return empty_like(input.parent());
+
+  auto output_offset = build_output_offsets(input, stream, mr);
+  auto const child   = input.get_sliced_child(stream);
 
-  auto null_mask = cudf::detail::copy_bitmask(input.parent(), stream, mr);
+  auto const sorted_child_table = segmented_sort_by_key(table_view{{child}},
+                                                        table_view{{child}},
+                                                        output_offset->view(),
+                                                        {column_order},
+                                                        {null_precedence},
+                                                        stream,
+                                                        mr);
 
-  // Assemble list column & return
   return make_lists_column(input.size(),
                            std::move(output_offset),
-                           std::move(output_child),
+                           std::move(sorted_child_table->release().front()),
                            input.null_count(),
-                           std::move(null_mask),
+                           cudf::detail::copy_bitmask(input.parent(), stream, mr),
                            stream,
                            mr);
 }
@@ -293,17 +95,9 @@ std::unique_ptr<column> stable_sort_lists(lists_column_view const& input,
 {
   if (input.is_empty()) { return empty_like(input.parent()); }
 
-  auto output_offset = make_numeric_column(
-    input.offsets().type(), input.size() + 1, mask_state::UNALLOCATED, stream, mr);
-  thrust::transform(rmm::exec_policy(stream),
-                    input.offsets_begin(),
-                    input.offsets_end(),
-                    output_offset->mutable_view().template begin<size_type>(),
-                    [first = input.offsets_begin()] __device__(auto offset_index) {
-                      return offset_index - *first;
-                    });
+  auto output_offset = build_output_offsets(input, stream, mr);
+  auto const child   = input.get_sliced_child(stream);
 
-  auto const child              = input.get_sliced_child(stream);
   auto const sorted_child_table = stable_segmented_sort_by_key(table_view{{child}},
                                                                table_view{{child}},
                                                                output_offset->view(),
diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu
index 20017eda402..685d8aa3ec1 100644
--- a/cpp/src/sort/segmented_sort.cu
+++ b/cpp/src/sort/segmented_sort.cu
@@ -14,8 +14,11 @@
  * limitations under the License.
  */
 
+#include <cudf/column/column_factories.hpp>
+#include <cudf/detail/copy.hpp>
 #include <cudf/detail/gather.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/sequence.hpp>
 #include <cudf/detail/sorting.hpp>
 #include <cudf/utilities/default_stream.hpp>
 
@@ -25,6 +28,8 @@
 #include <thrust/binary_search.h>
 #include <thrust/iterator/counting_iterator.h>
 
+#include <cub/device/device_segmented_sort.cuh>
+
 namespace cudf {
 namespace detail {
 
@@ -34,6 +39,130 @@ namespace {
  */
 enum class sort_method { STABLE, UNSTABLE };
 
+/**
+ * @brief Functor performs faster segmented sort on eligible columns
+ */
+struct column_fast_sort_fn {
+  /**
+   * @brief Run-time check for faster segmented sort on an eligible column
+   *
+   * Fast segmented sort can handle integral types including
+   * decimal types if dispatch_storage_type is used but it does not support int128.
+   */
+  static bool is_fast_sort_supported(column_view const& col)
+  {
+    return !col.has_nulls() and
+           (cudf::is_integral(col.type()) ||
+            (cudf::is_fixed_point(col.type()) and (col.type().id() != type_id::DECIMAL128)));
+  }
+
+  /**
+   * @brief Compile-time check for supporting fast segmented sort for a specific type
+   *
+   * The dispatch_storage_type means we can check for integral types to
+   * include fixed-point types but the CUB limitation means we need to exclude int128.
+   */
+  template <typename T>
+  static constexpr bool is_fast_sort_supported()
+  {
+    return cudf::is_integral<T>() and !std::is_same_v<__int128, T>;
+  }
+
+  template <typename T>
+  void fast_sort(column_view const& input,
+                 column_view const& segment_offsets,
+                 mutable_column_view& indices,
+                 bool ascending,
+                 rmm::cuda_stream_view stream)
+  {
+    // CUB's segmented sort functions cannot accept iterators.
+    // We create a temporary column here for it to use.
+    auto temp_col =
+      cudf::detail::allocate_like(input, input.size(), mask_allocation_policy::NEVER, stream);
+    mutable_column_view output_view = temp_col->mutable_view();
+
+    // DeviceSegmentedSort is faster then DeviceSegmentedRadixSort at this time
+    auto fast_sort_impl = [stream](bool ascending, [[maybe_unused]] auto&&... args) {
+      rmm::device_buffer d_temp_storage;
+      size_t temp_storage_bytes = 0;
+      if (ascending) {
+        cub::DeviceSegmentedSort::SortPairs(
+          d_temp_storage.data(), temp_storage_bytes, std::forward<decltype(args)>(args)...);
+        d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
+        cub::DeviceSegmentedSort::SortPairs(
+          d_temp_storage.data(), temp_storage_bytes, std::forward<decltype(args)>(args)...);
+      } else {
+        cub::DeviceSegmentedSort::SortPairsDescending(
+          d_temp_storage.data(), temp_storage_bytes, std::forward<decltype(args)>(args)...);
+        d_temp_storage = rmm::device_buffer{temp_storage_bytes, stream};
+        cub::DeviceSegmentedSort::SortPairsDescending(
+          d_temp_storage.data(), temp_storage_bytes, std::forward<decltype(args)>(args)...);
+      }
+    };
+
+    fast_sort_impl(ascending,
+                   input.begin<T>(),
+                   output_view.begin<T>(),
+                   indices.begin<size_type>(),
+                   indices.begin<size_type>(),
+                   input.size(),
+                   segment_offsets.size() - 1,
+                   segment_offsets.begin<size_type>(),
+                   segment_offsets.begin<size_type>() + 1,
+                   stream.value());
+  }
+
+  template <typename T, CUDF_ENABLE_IF(is_fast_sort_supported<T>())>
+  void operator()(column_view const& input,
+                  column_view const& segment_offsets,
+                  mutable_column_view& indices,
+                  bool ascending,
+                  rmm::cuda_stream_view stream)
+  {
+    fast_sort<T>(input, segment_offsets, indices, ascending, stream);
+  }
+
+  template <typename T, CUDF_ENABLE_IF(!is_fast_sort_supported<T>())>
+  void operator()(
+    column_view const&, column_view const&, mutable_column_view&, bool, rmm::cuda_stream_view)
+  {
+    CUDF_FAIL("Column type cannot be used with fast-sort function");
+  }
+};
+
+/**
+ * @brief Performs faster sort on eligible columns
+ *
+ * Check the `is_fast_sort_supported()==true` on the input column before using this function.
+ *
+ * @param input Column to sort
+ * @param segment_offsets Identifies segments to sort within
+ * @param column_order Sort ascending or descending
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ */
+std::unique_ptr<column> fast_segmented_sorted_order(column_view const& input,
+                                                    column_view const& segment_offsets,
+                                                    order const& column_order,
+                                                    rmm::cuda_stream_view stream,
+                                                    rmm::mr::device_memory_resource* mr)
+{
+  // Unfortunately, CUB's segmented sort functions cannot accept iterators.
+  // We have to build a pre-filled sequence of indices as input.
+  auto sorted_indices =
+    cudf::detail::sequence(input.size(), numeric_scalar<size_type>{0}, stream, mr);
+  auto indices_view = sorted_indices->mutable_view();
+
+  cudf::type_dispatcher<dispatch_storage_type>(input.type(),
+                                               column_fast_sort_fn{},
+                                               input,
+                                               segment_offsets,
+                                               indices_view,
+                                               column_order == order::ASCENDING,
+                                               stream);
+  return sorted_indices;
+}
+
 /**
  * @brief Builds indices to identify segments to sort
  *
@@ -89,8 +218,42 @@ std::unique_ptr<column> segmented_sorted_order_common(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
+  if (keys.num_rows() == 0 || keys.num_columns() == 0) {
+    return cudf::make_empty_column(type_to_id<size_type>());
+  }
+
   CUDF_EXPECTS(segment_offsets.type() == data_type(type_to_id<size_type>()),
                "segment offsets should be size_type");
+
+  if (not column_order.empty()) {
+    CUDF_EXPECTS(static_cast<std::size_t>(keys.num_columns()) == column_order.size(),
+                 "Mismatch between number of columns and column order.");
+  }
+
+  if (not null_precedence.empty()) {
+    CUDF_EXPECTS(static_cast<std::size_t>(keys.num_columns()) == null_precedence.size(),
+                 "Mismatch between number of columns and null_precedence size.");
+  }
+
+  // the average row size for which to prefer fast sort
+  constexpr cudf::size_type MAX_AVG_LIST_SIZE_FOR_FAST_SORT{100};
+  // the maximum row count for which to prefer fast sort
+  constexpr cudf::size_type MAX_LIST_SIZE_FOR_FAST_SORT{1 << 18};
+
+  // fast-path for single column sort:
+  // - single-column table
+  // - not stable-sort
+  // - no nulls and allowable fixed-width type
+  // - size and width are limited -- based on benchmark results
+  if (keys.num_columns() == 1 and sorting == sort_method::UNSTABLE and
+      column_fast_sort_fn::is_fast_sort_supported(keys.column(0)) and
+      (segment_offsets.size() > 0) and
+      (((keys.num_rows() / segment_offsets.size()) < MAX_AVG_LIST_SIZE_FOR_FAST_SORT) or
+       (keys.num_rows() < MAX_LIST_SIZE_FOR_FAST_SORT))) {
+    auto const col_order = column_order.empty() ? order::ASCENDING : column_order.front();
+    return fast_segmented_sorted_order(keys.column(0), segment_offsets, col_order, stream, mr);
+  }
+
   // Get segment id of each element in all segments.
   auto segment_ids = get_segment_indices(keys.num_rows(), segment_offsets, stream);
 
diff --git a/cpp/src/utilities/traits.cpp b/cpp/src/utilities/traits.cpp
index 14493a45cd7..bc10dd7845a 100644
--- a/cpp/src/utilities/traits.cpp
+++ b/cpp/src/utilities/traits.cpp
@@ -148,6 +148,16 @@ struct is_unsigned_impl {
  */
 bool is_unsigned(data_type type) { return cudf::type_dispatcher(type, is_unsigned_impl{}); }
 
+struct is_integral_impl {
+  template <typename T>
+  constexpr bool operator()()
+  {
+    return is_integral<T>();
+  }
+};
+
+bool is_integral(data_type type) { return cudf::type_dispatcher(type, is_integral_impl{}); }
+
 struct is_floating_point_impl {
   template <typename T>
   constexpr bool operator()()
diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp
index 09007df38ce..c1a742e63b8 100644
--- a/cpp/tests/sort/segmented_sort_tests.cpp
+++ b/cpp/tests/sort/segmented_sort_tests.cpp
@@ -97,7 +97,7 @@ TEST_F(SegmentedSortInt, Single)
   column_wrapper<int> segments2{{0, 3}};
   table_view table_1elem{{col1}};
   table_view table_1segm{{col3}};
-  CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1elem, table_1elem, segments2));
+
   CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1elem, table_1elem, segments1));
   CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1segm, table_1segm, segments2));
   CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(table_1segm, table_1segm, segments1));
@@ -251,27 +251,27 @@ TEST_F(SegmentedSortInt, Sliced)
 TEST_F(SegmentedSortInt, ErrorsMismatchArgSizes)
 {
   using T = int;
-  column_wrapper<T> col1{{1, 2, 3, 4}};
-  column_wrapper<T> col2{{5, 6, 7, 8, 9}};
+  column_wrapper<T> col1{{5, 6, 7, 8, 9}};
+  column_wrapper<T> segments{{1, 2, 3, 4}};
   table_view input1{{col1}};
 
   // Mismatch order sizes
   EXPECT_THROW(
-    cudf::segmented_sort_by_key(input1, input1, col2, {order::ASCENDING, order::ASCENDING}, {}),
+    cudf::segmented_sort_by_key(input1, input1, segments, {order::ASCENDING, order::ASCENDING}, {}),
     logic_error);
   // Mismatch null precedence sizes
-  EXPECT_THROW(
-    cudf::segmented_sort_by_key(input1, input1, col2, {}, {null_order::AFTER, null_order::AFTER}),
-    logic_error);
+  EXPECT_THROW(cudf::segmented_sort_by_key(
+                 input1, input1, segments, {}, {null_order::AFTER, null_order::AFTER}),
+               logic_error);
   // Both
   EXPECT_THROW(cudf::segmented_sort_by_key(input1,
                                            input1,
-                                           col2,
+                                           segments,
                                            {order::ASCENDING, order::ASCENDING},
                                            {null_order::AFTER, null_order::AFTER}),
                logic_error);
   // segmented_offsets beyond num_rows - undefined behavior, no throw.
-  CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, col2));
+  CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, segments));
 }
 
 }  // namespace test

From 414140b7b7b22d55d4090f547df59db3e7799428 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Tue, 15 Nov 2022 18:05:33 -0800
Subject: [PATCH 170/202] check number of rows on empty data

---
 cpp/src/io/orc/reader_impl.cu | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index 7a135c1f2f2..cc686edeeef 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -991,7 +991,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
       // Map each ORC column to its column
       _col_meta.orc_col_map[level][col.id] = column_types.size() - 1;
-      // TODO: Once MAP type is supported in cuDF, update this for MAP as well
       if (col_type == type_id::LIST or col_type == type_id::STRUCT) nested_col.emplace_back(col);
     }
 
@@ -1072,11 +1071,20 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
           if (total_data_size == 0) {
             CUDF_EXPECTS(stripe_info->indexLength == 0, "Invalid index rowgroup stream data");
+
+            auto const are_all_empty =
+              std::all_of(_col_meta.num_child_rows.begin(),
+                          _col_meta.num_child_rows.end(),
+                          [](auto col_num_rows) { return col_num_rows == 0; });
+
+            auto const are_all_structs =
+              std::all_of(column_types.begin(), column_types.end(), [](auto dtype) {
+                return dtype.id() == type_id::STRUCT;
+              });
+
             // In case ROW GROUP INDEX is not present and all columns are structs with no null
             // stream, there is nothing to read at this level.
-            auto fn_check_dtype = [](auto dtype) { return dtype.id() == type_id::STRUCT; };
-            CUDF_EXPECTS(std::all_of(column_types.begin(), column_types.end(), fn_check_dtype),
-                         "Expected streams data within stripe");
+            CUDF_EXPECTS(are_all_empty or are_all_structs, "Expected streams data within stripe");
             is_data_empty = true;
           }
 

From c574ddf2a8669375be1f459b686ff28b32176f9e Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Tue, 15 Nov 2022 20:40:10 -0600
Subject: [PATCH 171/202] Fix decimal binary operations (#12142)

Fixes: #11636
This PR:

- [x] Fixes an `UnboundLocalError` error.
- [x] Fixes reflected binary operations and added tests for the same.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12142
---
 python/cudf/cudf/_lib/binaryop.pyx      |   5 +-
 python/cudf/cudf/core/column/decimal.py |  18 ++--
 python/cudf/cudf/tests/test_binops.py   | 108 +++++++++++++++++++++++-
 3 files changed, 115 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index 995fdc7e315..f2fec24b05a 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -21,7 +21,7 @@ from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.cpp.types cimport data_type, type_id
 from cudf._lib.types cimport dtype_to_data_type, underlying_type_t_type_id
 
-from cudf.api.types import is_scalar, is_string_dtype
+from cudf.api.types import is_scalar
 
 cimport cudf._lib.cpp.binaryop as cpp_binaryop
 from cudf._lib.cpp.binaryop cimport binary_operator
@@ -173,7 +173,6 @@ def binaryop(lhs, rhs, op, dtype):
     cdef data_type c_dtype = dtype_to_data_type(dtype)
 
     if is_scalar(lhs) or lhs is None:
-        is_string_col = is_string_dtype(rhs.dtype)
         s_lhs = as_device_scalar(lhs, dtype=rhs.dtype if lhs is None else None)
         result = binaryop_s_v(
             s_lhs,
@@ -183,7 +182,6 @@ def binaryop(lhs, rhs, op, dtype):
         )
 
     elif is_scalar(rhs) or rhs is None:
-        is_string_col = is_string_dtype(lhs.dtype)
         s_rhs = as_device_scalar(rhs, dtype=lhs.dtype if rhs is None else None)
         result = binaryop_v_s(
             lhs,
@@ -193,7 +191,6 @@ def binaryop(lhs, rhs, op, dtype):
         )
 
     else:
-        is_string_col = is_string_dtype(lhs.dtype)
         result = binaryop_v_v(
             lhs,
             rhs,
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 5ee9024a0d8..157bc1f4291 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -79,12 +79,14 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
 
         # Binary Arithmetics between decimal columns. `Scale` and `precision`
         # are computed outside of libcudf
+        unsupported_msg = (
+            f"{op} not supported for the following dtypes: "
+            f"{self.dtype}, {other.dtype}"
+        )
         try:
             if op in {"__add__", "__sub__", "__mul__", "__div__"}:
-                output_type = _get_decimal_type(self.dtype, other.dtype, op)
-                result = libcudf.binaryop.binaryop(
-                    self, other, op, output_type
-                )
+                output_type = _get_decimal_type(lhs.dtype, rhs.dtype, op)
+                result = libcudf.binaryop.binaryop(lhs, rhs, op, output_type)
                 # TODO:  Why is this necessary? Why isn't the result's
                 # precision already set correctly based on output_type?
                 result.dtype.precision = output_type.precision
@@ -96,12 +98,12 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
                 "__le__",
                 "__ge__",
             }:
-                result = libcudf.binaryop.binaryop(self, other, op, bool)
+                result = libcudf.binaryop.binaryop(lhs, rhs, op, bool)
+            else:
+                raise TypeError(unsupported_msg)
         except RuntimeError as e:
             if "Unsupported operator for these types" in str(e):
-                raise NotImplementedError(
-                    f"{op} not supported for types with different bit-widths"
-                ) from e
+                raise TypeError(unsupported_msg) from e
             raise
 
         return result
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 2229bcc1938..c8b8dfa1e60 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -1775,7 +1775,7 @@ def test_binops_with_NA_consistent(dtype, op):
 
 
 @pytest.mark.parametrize(
-    "args",
+    "op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype",
     [
         (
             operator.add,
@@ -1786,6 +1786,15 @@ def test_binops_with_NA_consistent(dtype, op):
             ["3.0", "4.0"],
             cudf.Decimal64Dtype(scale=2, precision=4),
         ),
+        (
+            operator.add,
+            2,
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["3.5", "4.0"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+        ),
         (
             operator.add,
             ["1.5", "2.0"],
@@ -1831,6 +1840,15 @@ def test_binops_with_NA_consistent(dtype, op):
             ["99.9", "199.8"],
             cudf.Decimal128Dtype(scale=6, precision=19),
         ),
+        (
+            operator.sub,
+            2,
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["2.25", "1.005"],
+            cudf.Decimal64Dtype(scale=3, precision=4),
+            ["-0.25", "0.995"],
+            cudf.Decimal64Dtype(scale=3, precision=5),
+        ),
         (
             operator.mul,
             ["1.5", "2.0"],
@@ -1858,6 +1876,15 @@ def test_binops_with_NA_consistent(dtype, op):
             ["343.0", "1000.0"],
             cudf.Decimal64Dtype(scale=0, precision=8),
         ),
+        (
+            operator.mul,
+            200,
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["0.343", "0.500"],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["68.60", "100.0"],
+            cudf.Decimal64Dtype(scale=6, precision=13),
+        ),
         (
             operator.truediv,
             ["1.5", "2.0"],
@@ -1885,6 +1912,15 @@ def test_binops_with_NA_consistent(dtype, op):
             ["56.77", "1.79"],
             cudf.Decimal128Dtype(scale=13, precision=25),
         ),
+        (
+            operator.truediv,
+            20,
+            cudf.Decimal128Dtype(scale=2, precision=6),
+            ["20", "20"],
+            cudf.Decimal128Dtype(scale=2, precision=6),
+            ["1.0", "1.0"],
+            cudf.Decimal128Dtype(scale=9, precision=15),
+        ),
         (
             operator.add,
             ["1.5", None, "2.0"],
@@ -2103,10 +2139,12 @@ def test_binops_with_NA_consistent(dtype, op):
         ),
     ],
 )
-def test_binops_decimal(args):
-    op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype = args
+def test_binops_decimal(op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype):
 
-    a = utils._decimal_series(lhs, l_dtype)
+    if isinstance(lhs, (int, float)):
+        a = cudf.Scalar(lhs, l_dtype)
+    else:
+        a = utils._decimal_series(lhs, l_dtype)
     b = utils._decimal_series(rhs, r_dtype)
     expect = (
         utils._decimal_series(expect, expect_dtype)
@@ -2122,6 +2160,68 @@ def test_binops_decimal(args):
     utils.assert_eq(expect, got)
 
 
+@pytest.mark.parametrize(
+    "op,lhs,l_dtype,rhs,r_dtype,expect,expect_dtype",
+    [
+        (
+            "radd",
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=2, precision=3),
+            ["3.0", "4.0"],
+            cudf.Decimal64Dtype(scale=2, precision=4),
+        ),
+        (
+            "rsub",
+            ["100", "200"],
+            cudf.Decimal64Dtype(scale=-2, precision=10),
+            ["0.1", "0.2"],
+            cudf.Decimal64Dtype(scale=6, precision=10),
+            ["-99.9", "-199.8"],
+            cudf.Decimal128Dtype(scale=6, precision=19),
+        ),
+        (
+            "rmul",
+            ["1000", "2000"],
+            cudf.Decimal64Dtype(scale=-3, precision=4),
+            ["0.343", "0.500"],
+            cudf.Decimal64Dtype(scale=3, precision=3),
+            ["343.0", "1000.0"],
+            cudf.Decimal64Dtype(scale=0, precision=8),
+        ),
+        (
+            "rtruediv",
+            ["1.5", "0.5"],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["1.5", "2.0"],
+            cudf.Decimal64Dtype(scale=3, precision=6),
+            ["1.0", "4.0"],
+            cudf.Decimal64Dtype(scale=10, precision=16),
+        ),
+    ],
+)
+def test_binops_reflect_decimal(
+    op, lhs, l_dtype, rhs, r_dtype, expect, expect_dtype
+):
+
+    a = utils._decimal_series(lhs, l_dtype)
+    b = utils._decimal_series(rhs, r_dtype)
+    expect = utils._decimal_series(expect, expect_dtype)
+
+    got = getattr(a, op)(b)
+    assert expect.dtype == got.dtype
+    utils.assert_eq(expect, got)
+
+
+def test_binops_raise_error():
+    s = cudf.Series([decimal.Decimal("1.324324")])
+    with pytest.raises(TypeError):
+        s**1
+    with pytest.raises(TypeError):
+        s // 1
+
+
 @pytest.mark.parametrize(
     "args",
     [

From a8c0f4be674bb782ccd8739481479fcfa2982502 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 16 Nov 2022 11:31:57 +0000
Subject: [PATCH 172/202] Fix type promotion edge cases in numerical binops
 (#12074)

The type normalisation applied before heading into libcudf previously had slightly unexpected consequences for large int64 values. If not providing a `cudf.Scalar`, a bare `int64` scalar would be cast to `uint64` and then normal numpy type promotion would unify to `float64`. This is lossy, since int64 to float64 is neither surjective nor injective.

To avoid this, try very hard to maintain the dtype of the object coming in, and match pandas behaviour by applying numpy type promotion rules via `numpy.result_type`.

- Closes #5938
- Closes #7389
- Closes #12072
- Closes #12092

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12074
---
 python/cudf/cudf/core/column/column.py    |   3 +-
 python/cudf/cudf/core/column/numerical.py |  53 +++++----
 python/cudf/cudf/core/column/timedelta.py |  10 +-
 python/cudf/cudf/core/index.py            |   5 +-
 python/cudf/cudf/core/series.py           |   5 +-
 python/cudf/cudf/tests/test_binops.py     |  61 ++++++++++
 python/cudf/cudf/tests/test_series.py     |   6 +
 python/cudf/cudf/tests/test_timedelta.py  | 135 ++++++++++++++++++++--
 8 files changed, 236 insertions(+), 42 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 6c17b492f8a..59851a1c11b 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -502,7 +502,8 @@ def _wrap_binop_normalization(self, other):
         if other is NA or other is None:
             return cudf.Scalar(other, dtype=self.dtype)
         if isinstance(other, np.ndarray) and other.ndim == 0:
-            other = other.item()
+            # Try and maintain the dtype
+            other = other.dtype.type(other.item())
         return self.normalize_binop_value(other)
 
     def _scatter_by_slice(
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index f126f47c3c2..7943135afe1 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -35,7 +35,7 @@
     is_number,
     is_scalar,
 )
-from cudf.core.buffer import Buffer, as_buffer, cuda_array_interface_wrapper
+from cudf.core.buffer import Buffer, cuda_array_interface_wrapper
 from cudf.core.column import (
     ColumnBase,
     as_column,
@@ -225,10 +225,18 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                     (tmp.dtype.type in int_float_dtype_mapping)
                     and (tmp.dtype.type != np.bool_)
                     and (
-                        (np.isscalar(tmp) and (0 == tmp))
-                        or (
-                            (isinstance(tmp, NumericalColumn)) and (0.0 in tmp)
+                        (
+                            (
+                                np.isscalar(tmp)
+                                or (
+                                    isinstance(tmp, cudf.Scalar)
+                                    # host to device copy
+                                    and tmp.is_valid()
+                                )
+                            )
+                            and (0 == tmp)
                         )
+                        or ((isinstance(tmp, NumericalColumn)) and (0 in tmp))
                     )
                 ):
                     out_dtype = cudf.dtype("float64")
@@ -274,7 +282,7 @@ def nans_to_nulls(self: NumericalColumn) -> NumericalColumn:
 
     def normalize_binop_value(
         self, other: ScalarLike
-    ) -> Union[ColumnBase, ScalarLike]:
+    ) -> Union[ColumnBase, cudf.Scalar]:
         if isinstance(other, ColumnBase):
             if not isinstance(other, NumericalColumn):
                 return NotImplemented
@@ -285,25 +293,24 @@ def normalize_binop_value(
             # expensive device-host transfer just to
             # adjust the dtype
             other = other.value
-        other_dtype = np.min_scalar_type(other)
-        if other_dtype.kind in {"b", "i", "u", "f"}:
-            if isinstance(other, cudf.Scalar):
-                return other
-            other_dtype = np.promote_types(self.dtype, other_dtype)
-            if other_dtype == np.dtype("float16"):
-                other_dtype = cudf.dtype("float32")
-                other = other_dtype.type(other)
+        # Try and match pandas and hence numpy. Deduce the common
+        # dtype via the _value_ of other, and the dtype of self. TODO:
+        # When NEP50 is accepted, this might want changed or
+        # simplified.
+        # This is not at all simple:
+        # np.result_type(np.int64(0), np.uint8)
+        #   => np.uint8
+        # np.result_type(np.asarray([0], dtype=np.int64), np.uint8)
+        #   => np.int64
+        # np.promote_types(np.int64(0), np.uint8)
+        #   => np.int64
+        # np.promote_types(np.asarray([0], dtype=np.int64).dtype, np.uint8)
+        #   => np.int64
+        common_dtype = np.result_type(self.dtype, other)
+        if common_dtype.kind in {"b", "i", "u", "f"}:
             if self.dtype.kind == "b":
-                other_dtype = min_signed_type(other)
-            if np.isscalar(other):
-                return cudf.dtype(other_dtype).type(other)
-            else:
-                ary = full(len(self), other, dtype=other_dtype)
-                return column.build_column(
-                    data=as_buffer(ary),
-                    dtype=ary.dtype,
-                    mask=self.mask,
-                )
+                common_dtype = min_signed_type(other)
+            return cudf.Scalar(other, dtype=common_dtype)
         else:
             return NotImplemented
 
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 3dc923e7ded..901547d94a9 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -181,17 +181,17 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
                 out_dtype = determine_out_dtype(self.dtype, other.dtype)
             elif op in {"__truediv__", "__floordiv__"}:
                 common_dtype = determine_out_dtype(self.dtype, other.dtype)
-                this = self.astype(common_dtype).astype("float64")
+                out_dtype = np.float64 if op == "__truediv__" else np.int64
+                this = self.astype(common_dtype).astype(out_dtype)
                 if isinstance(other, cudf.Scalar):
                     if other.is_valid():
                         other = other.value.astype(common_dtype).astype(
-                            "float64"
+                            out_dtype
                         )
                     else:
-                        other = cudf.Scalar(None, "float64")
+                        other = cudf.Scalar(None, out_dtype)
                 else:
-                    other = other.astype(common_dtype).astype("float64")
-                out_dtype = np.float64 if op == "__truediv__" else np.int64
+                    other = other.astype(common_dtype).astype(out_dtype)
             elif op in {"__add__", "__sub__"}:
                 out_dtype = determine_out_dtype(self.dtype, other.dtype)
         elif other.dtype.kind in {"f", "i", "u"}:
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 61971e3c749..e561dd0a214 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2072,7 +2072,10 @@ def microsecond(self):
         """  # noqa: E501
         return as_index(
             (
-                self._values.get_dt_field("millisecond")
+                # Need to manually promote column to int32 because
+                # pandas-matching binop behaviour requires that this
+                # __mul__ returns an int16 column.
+                self._values.get_dt_field("millisecond").astype("int32")
                 * cudf.Scalar(1000, dtype="int32")
             )
             + self._values.get_dt_field("microsecond"),
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 873bebf1292..8f4f6fe57d6 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -3660,7 +3660,10 @@ def microsecond(self):
         """
         return Series(
             data=(
-                self.series._column.get_dt_field("millisecond")
+                # Need to manually promote column to int32 because
+                # pandas-matching binop behaviour requires that this
+                # __mul__ returns an int16 column.
+                self.series._column.get_dt_field("millisecond").astype("int32")
                 * cudf.Scalar(1000, dtype="int32")
             )
             + self.series._column.get_dt_field("microsecond"),
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index c8b8dfa1e60..8337084be72 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -877,6 +877,67 @@ def test_binop_bool_uint(func, rhs):
     )
 
 
+@pytest.mark.parametrize(
+    "series_dtype", (np.bool_, np.int8, np.uint8, np.int64, np.uint64)
+)
+@pytest.mark.parametrize(
+    "divisor_dtype",
+    (
+        pytest.param(
+            np.bool_,
+            marks=pytest.mark.xfail(
+                reason=(
+                    "Pandas handling of division by zero-bool is too strange"
+                )
+            ),
+        ),
+        np.int8,
+        np.uint8,
+        np.int64,
+        np.uint64,
+    ),
+)
+@pytest.mark.parametrize("scalar_divisor", [False, True])
+def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor):
+    sr = pd.Series([1, 2, 3], dtype=series_dtype)
+    cr = cudf.from_pandas(sr)
+
+    if scalar_divisor:
+        pd_div = divisor_dtype(0)
+        cudf_div = cudf.Scalar(0, dtype=divisor_dtype)
+    else:
+        pd_div = pd.Series([0], dtype=divisor_dtype)
+        cudf_div = cudf.from_pandas(pd_div)
+    utils.assert_eq((sr // pd_div), (cr // cudf_div))
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    (
+        pytest.param(
+            np.bool_,
+            marks=pytest.mark.xfail(
+                reason=(
+                    "Pandas handling of division by zero-bool is too strange"
+                )
+            ),
+        ),
+        np.int8,
+        np.uint8,
+        np.int64,
+        np.uint64,
+        np.float32,
+        np.float64,
+    ),
+)
+def test_rmod_zero_nan(dtype):
+    sr = pd.Series([1, 1, 0], dtype=dtype)
+    cr = cudf.from_pandas(sr)
+    utils.assert_eq(1 % sr, 1 % cr)
+    expected_dtype = np.float64 if cr.dtype.kind != "f" else dtype
+    utils.assert_eq(1 % cr, cudf.Series([0, 0, None], dtype=expected_dtype))
+
+
 def test_series_misc_binop():
     pds = pd.Series([1, 2, 4], name="abc xyz")
     gds = cudf.Series([1, 2, 4], name="abc xyz")
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index c902bcb8b47..2525f055738 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1995,6 +1995,12 @@ def test_set_bool_error(dtype, bool_scalar):
     )
 
 
+def test_int64_equality():
+    s = cudf.Series(np.asarray([2**63 - 10, 2**63 - 100], dtype=np.int64))
+    assert (s != np.int64(2**63 - 1)).all()
+    assert (s != cudf.Scalar(2**63 - 1, dtype=np.int64)).all()
+
+
 @pytest.mark.parametrize("into", [dict, OrderedDict, defaultdict(list)])
 def test_series_to_dict(into):
     gs = cudf.Series(["ab", "de", "zx"], index=[10, 20, 100])
diff --git a/python/cudf/cudf/tests/test_timedelta.py b/python/cudf/cudf/tests/test_timedelta.py
index 23270875a92..c1b603e34f2 100644
--- a/python/cudf/cudf/tests/test_timedelta.py
+++ b/python/cudf/cudf/tests/test_timedelta.py
@@ -400,12 +400,7 @@ def test_timedelta_dataframe_ops(df, op):
         [1],
         [12, 11, 232, 223432411, 2343241, 234324, 23234],
         [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
-        pytest.param(
-            [1.321, 1132.324, 23223231.11, 233.41, 0.2434, 332, 323],
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/5938"
-            ),
-        ),
+        [1.321, 1132.324, 23223231.11, 233.41, 332, 323],
         [12, 11, 2.32, 2234.32411, 2343.241, 23432.4, 23234],
     ],
 )
@@ -492,6 +487,36 @@ def test_timedelta_series_ops_with_scalars(data, other_scalars, dtype, op):
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "reverse",
+    [
+        False,
+        pytest.param(
+            True,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason=(
+                    "timedelta modulo by zero is dubiously defined in "
+                    "both pandas and cuDF "
+                    "(see https://github.com/rapidsai/cudf/issues/5938)"
+                ),
+            ),
+        ),
+    ],
+)
+def test_timedelta_series_mod_with_scalar_zero(reverse):
+    gsr = cudf.Series(data=[0.2434], dtype=np.timedelta64(1, "ns"))
+    psr = gsr.to_pandas()
+    scalar = datetime.timedelta(days=768)
+    if reverse:
+        expected = scalar % psr
+        actual = scalar % gsr
+    else:
+        expected = psr % scalar
+        actual = gsr % scalar
+    assert_eq(expected, actual)
+
+
 @pytest.mark.parametrize(
     "data",
     [
@@ -597,6 +622,37 @@ def test_timedelta_series_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "reverse",
+    [
+        False,
+        pytest.param(
+            True,
+            marks=pytest.mark.xfail(
+                strict=True,
+                reason=(
+                    "timedelta modulo by zero is dubiously defined in "
+                    "both pandas and cuDF "
+                    "(see https://github.com/rapidsai/cudf/issues/5938)"
+                ),
+            ),
+        ),
+    ],
+)
+def test_timedelta_series_mod_with_cudf_scalar_zero(reverse):
+    gsr = cudf.Series(data=[0.2434], dtype=np.timedelta64(1, "ns"))
+    psr = gsr.to_pandas()
+    scalar = datetime.timedelta(days=768)
+    gpu_scalar = cudf.Scalar(scalar)
+    if reverse:
+        expected = scalar % psr
+        actual = gpu_scalar % gsr
+    else:
+        expected = psr % scalar
+        actual = gsr % gpu_scalar
+    assert_eq(expected, actual)
+
+
 @pytest.mark.parametrize(
     "data",
     [
@@ -812,7 +868,8 @@ def test_timedelta_datetime_index_ops_misc(
         pytest.param(
             "floordiv",
             marks=pytest.mark.xfail(
-                reason="https://github.com/pandas-dev/pandas/issues/35529"
+                condition=not PANDAS_GE_120,
+                reason="https://github.com/pandas-dev/pandas/issues/35529",
             ),
         ),
     ],
@@ -850,7 +907,35 @@ def test_timedelta_index_ops_with_scalars(data, other_scalars, dtype, op):
         expected = other_scalars // ptdi
         actual = other_scalars // gtdi
 
-    assert_eq(expected, actual)
+    if op == "floordiv":
+        # Hand-coding pytest.xfail behaviour for certain combinations
+        if (
+            0 in ptdi.astype("int")
+            and np.timedelta64(other_scalars).item() is not None
+        ):
+            with pytest.raises(AssertionError):
+                # Related to https://github.com/rapidsai/cudf/issues/5938
+                #
+                # Division by zero for datetime or timedelta is
+                # dubiously defined in both pandas (Any // 0 -> 0 in
+                # pandas) and cuDF (undefined behaviour)
+                assert_eq(expected, actual)
+        elif (
+            (None not in ptdi)
+            and np.nan not in expected
+            and (
+                expected.astype("float64").astype("int64")
+                != expected.astype("int64")
+            ).any()
+        ):
+            with pytest.raises(AssertionError):
+                # Incorrect implementation of floordiv in cuDF:
+                # https://github.com/rapidsai/cudf/issues/12120
+                assert_eq(expected, actual)
+        else:
+            assert_eq(expected, actual)
+    else:
+        assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize("data", _TIMEDELTA_DATA_NON_OVERFLOW)
@@ -876,12 +961,12 @@ def test_timedelta_index_ops_with_scalars(data, other_scalars, dtype, op):
         pytest.param(
             "floordiv",
             marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/5938"
+                condition=not PANDAS_GE_120,
+                reason="https://github.com/pandas-dev/pandas/issues/35529",
             ),
         ),
     ],
 )
-@pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning:pandas")
 def test_timedelta_index_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
     gtdi = cudf.Index(data=data, dtype=dtype)
     ptdi = gtdi.to_pandas()
@@ -916,7 +1001,35 @@ def test_timedelta_index_ops_with_cudf_scalars(data, cpu_scalar, dtype, op):
         expected = cpu_scalar // ptdi
         actual = gpu_scalar // gtdi
 
-    assert_eq(expected, actual)
+    if op == "floordiv":
+        # Hand-coding pytest.xfail behaviour for certain combinations
+        if (
+            0 in ptdi.astype("int")
+            and np.timedelta64(cpu_scalar).item() is not None
+        ):
+            with pytest.raises(AssertionError):
+                # Related to https://github.com/rapidsai/cudf/issues/5938
+                #
+                # Division by zero for datetime or timedelta is
+                # dubiously defined in both pandas (Any // 0 -> 0 in
+                # pandas) and cuDF (undefined behaviour)
+                assert_eq(expected, actual)
+        elif (
+            (None not in ptdi)
+            and np.nan not in expected
+            and (
+                expected.astype("float64").astype("int64")
+                != expected.astype("int64")
+            ).any()
+        ):
+            with pytest.raises(AssertionError):
+                # Incorrect implementation of floordiv in cuDF:
+                # https://github.com/rapidsai/cudf/issues/12120
+                assert_eq(expected, actual)
+        else:
+            assert_eq(expected, actual)
+    else:
+        assert_eq(expected, actual)
 
 
 @pytest.mark.parametrize("data", _TIMEDELTA_DATA)

From 38235de0c0b4038468fbbe5ca703b762f3d1b717 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 16 Nov 2022 06:34:38 -0800
Subject: [PATCH 173/202] pin dask

---
 ci/benchmark/build.sh                            | 4 ++--
 ci/cpu/build.sh                                  | 2 +-
 ci/gpu/build.sh                                  | 4 ++--
 conda/environments/all_cuda-115_arch-x86_64.yaml | 4 ++--
 conda/recipes/custreamz/meta.yaml                | 4 ++--
 conda/recipes/dask-cudf/meta.yaml                | 8 ++++----
 dependencies.yaml                                | 4 ++--
 python/dask_cudf/setup.py                        | 4 ++--
 8 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 0ea39a0b365..011f947beb0 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -37,10 +37,10 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.9.2"
+export DASK_STABLE_VERSION="2022.11.0"
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index ada69fe0923..2e12308169f 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -28,7 +28,7 @@ export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
 
 # Whether to keep `dask/label/dev` channel in the env. If INSTALL_DASK_MAIN=0,
 # `dask/label/dev` channel is removed.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 # Switch to project root; also root of repo checkout
 cd "$WORKSPACE"
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 500c3bdbcc5..2b3e4147d89 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -32,10 +32,10 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 unset GIT_DESCRIBE_TAG
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.9.2"
+export DASK_STABLE_VERSION="2022.11.0"
 
 # ucx-py version
 export UCX_PY_VERSION='0.29.*'
diff --git a/conda/environments/all_cuda-115_arch-x86_64.yaml b/conda/environments/all_cuda-115_arch-x86_64.yaml
index a7e5f1a04a6..2e52b86b705 100644
--- a/conda/environments/all_cuda-115_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-115_arch-x86_64.yaml
@@ -21,8 +21,8 @@ dependencies:
 - cxx-compiler
 - cython>=0.29,<0.30
 - dask-cuda=22.12.*
-- dask>=2022.9.2
-- distributed>=2022.9.2
+- dask==2022.11.0
+- distributed==2022.11.0
 - dlpack>=0.5,<0.6.0a0
 - doxygen=1.8.20
 - fastavro>=0.22.9
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 989f8855533..19d9728b234 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -29,8 +29,8 @@ requirements:
     - python
     - streamz
     - cudf ={{ version }}
-    - dask>=2022.9.2
-    - distributed>=2022.9.2
+    - dask==2022.11.0
+    - distributed==2022.11.0
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka ={{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 1c718c0e995..8f83053eab5 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -24,14 +24,14 @@ requirements:
   host:
     - python
     - cudf ={{ version }}
-    - dask>=2022.9.2
-    - distributed>=2022.9.2
+    - dask==2022.11.0
+    - distributed==2022.11.0
     - cudatoolkit ={{ cuda_version }}
   run:
     - python
     - cudf ={{ version }}
-    - dask>=2022.9.2
-    - distributed>=2022.9.2
+    - dask==2022.11.0
+    - distributed==2022.11.0
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:                                   # [linux64]
diff --git a/dependencies.yaml b/dependencies.yaml
index b8470f02f86..3ba21139287 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -141,8 +141,8 @@ dependencies:
       - output_types: [conda, requirements]
         packages:
           - cachetools
-          - dask>=2022.9.2
-          - distributed>=2022.9.2
+          - dask==2022.11.0
+          - distributed==2022.11.0
           - fsspec>=0.6.0
           - numba>=0.56.2
           - numpy
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 4fa2af89b9d..aaa3635efd7 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -9,8 +9,8 @@
 
 install_requires = [
     "cudf",
-    "dask>=2022.9.2",
-    "distributed>=2022.9.2",
+    "dask==2022.11.0",
+    "distributed==2022.11.0",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",

From 7adf22946d548628d275ebd3afc77cbf8fca64dc Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Wed, 16 Nov 2022 10:22:20 -0600
Subject: [PATCH 174/202] Update build.sh

---
 ci/gpu/build.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 2b3e4147d89..e33fb89ab75 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -101,7 +101,7 @@ function install_dask {
         conda list
     else
         gpuci_logger "gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall"
-        gpuci_mamba_retry install conda-forge::dask=={$DASK_STABLE_VERSION} conda-forge::distributed=={$DASK_STABLE_VERSION} conda-forge::dask-core=={$DASK_STABLE_VERSION} --force-reinstall
+        gpuci_mamba_retry install conda-forge::dask==$DASK_STABLE_VERSION conda-forge::distributed==$DASK_STABLE_VERSION conda-forge::dask-core==$DASK_STABLE_VERSION --force-reinstall
     fi
     # Install the main version of streamz
     gpuci_logger "Install the main version of streamz"

From 742093e3af975c297ce2ed2d9e6c14df2b464a3b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 16 Nov 2022 10:59:05 -0600
Subject: [PATCH 175/202] Support `+` in `strings_udf` (#12117)

This PR adds support for the following operator `strings_udf`:

- `st + other`

Part of https://github.com/rapidsai/cudf/issues/9639

Authors:
  - https://github.com/brandon-b-miller
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12117
---
 python/cudf/cudf/core/udf/strings_typing.py   | 10 +++++++
 python/cudf/cudf/tests/test_udf_masked_ops.py |  9 ++++++
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 13 +++++++++
 python/strings_udf/strings_udf/_typing.py     |  5 ++++
 python/strings_udf/strings_udf/lowering.py    | 28 +++++++++++++++++++
 .../strings_udf/tests/test_string_udfs.py     | 16 +++++++++++
 6 files changed, 81 insertions(+)

diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index f8f50600b12..e8a35c12f71 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -59,6 +59,16 @@ def len_typing(self, args, kws):
         return nb_signature(size_type, args[0])
 
 
+@register_string_function(operator.add)
+def concat_typing(self, args, kws):
+    if _is_valid_string_arg(args[0]) and _is_valid_string_arg(args[1]):
+        return nb_signature(
+            MaskedType(udf_string),
+            MaskedType(string_view),
+            MaskedType(string_view),
+        )
+
+
 @register_string_function(operator.contains)
 def contains_typing(self, args, kws):
     if _is_valid_string_arg(args[0]) and _is_valid_string_arg(args[1]):
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 7af47f981d6..fbe6b3f8888 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -903,6 +903,15 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat(str_udf_data, concat_char):
+    def func(row):
+        return row["str_col"] + concat_char
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "data", [[1.0, 0.0, 1.5], [1, 0, 2], [True, False, True]]
 )
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 21998d59bbb..8fc158d7eb7 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -270,3 +270,16 @@ extern "C" __device__ int rstrip(int* nb_retval,
 
   return 0;
 }
+
+extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs, void* const* rhs)
+{
+  auto lhs_ptr = reinterpret_cast<cudf::string_view const*>(lhs);
+  auto rhs_ptr = reinterpret_cast<cudf::string_view const*>(rhs);
+
+  auto udf_str_ptr = new (udf_str) udf_string;
+
+  udf_string result;
+  result.append(*lhs_ptr).append(*rhs_ptr);
+  *udf_str_ptr = result;
+  return 0;
+}
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index a309a9cb93c..b678db88b95 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -159,8 +159,13 @@ def generic(self, args, kws):
 register_stringview_binaryop(operator.gt, types.boolean)
 register_stringview_binaryop(operator.le, types.boolean)
 register_stringview_binaryop(operator.ge, types.boolean)
+
+# st in other
 register_stringview_binaryop(operator.contains, types.boolean)
 
+# st + other
+register_stringview_binaryop(operator.add, udf_string)
+
 
 def create_binary_attr(attrname, retty):
     """
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index a6d43ece1c5..9e34b61e6da 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -25,6 +25,9 @@
 # CUDA function declarations
 # read-only (input is a string_view, output is a fixed with type)
 _string_view_len = cuda.declare_device("len", size_type(_STR_VIEW_PTR))
+_concat_string_view = cuda.declare_device(
+    "concat", types.void(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR)
+)
 
 
 def _declare_binary_func(lhs, rhs, out, name):
@@ -160,6 +163,31 @@ def len_impl(context, builder, sig, args):
     return result
 
 
+def call_concat_string_view(result, lhs, rhs):
+    return _concat_string_view(result, lhs, rhs)
+
+
+@cuda_lower(operator.add, string_view, string_view)
+def concat_impl(context, builder, sig, args):
+    lhs_ptr = builder.alloca(args[0].type)
+    rhs_ptr = builder.alloca(args[1].type)
+    builder.store(args[0], lhs_ptr)
+    builder.store(args[1], rhs_ptr)
+
+    udf_str_ptr = builder.alloca(default_manager[udf_string].get_value_type())
+    _ = context.compile_internal(
+        builder,
+        call_concat_string_view,
+        types.void(_UDF_STRING_PTR, _STR_VIEW_PTR, _STR_VIEW_PTR),
+        (udf_str_ptr, lhs_ptr, rhs_ptr),
+    )
+
+    result = cgutils.create_struct_proxy(udf_string)(
+        context, builder, value=builder.load(udf_str_ptr)
+    )
+    return result._getvalue()
+
+
 def create_binary_string_func(binary_func, retty):
     """
     Provide a wrapper around numba's low-level extension API which
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index 522433d404f..49663ee02ec 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -302,3 +302,19 @@ def func(st):
         return st.rstrip(strip_char)
 
     run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat(data, concat_char):
+    def func(st):
+        return st + concat_char
+
+    run_udf_test(data, func, "str")
+
+
+@pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
+def test_string_udf_concat_reflected(data, concat_char):
+    def func(st):
+        return concat_char + st
+
+    run_udf_test(data, func, "str")

From 6ad57524ed6ab5037755491839dab53f00b8158a Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 16 Nov 2022 11:00:21 -0600
Subject: [PATCH 176/202] Use rapidsai CODE_OF_CONDUCT.md (#12166)

This repo's `CODE_OF_CONDUCT.md` is superseded by an organization-wide policy: https://github.com/rapidsai/.github/pull/3

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/12166
---
 CODE_OF_CONDUCT.md | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 CODE_OF_CONDUCT.md

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
deleted file mode 100644
index 563581d270d..00000000000
--- a/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1 +0,0 @@
-This project has adopted the [Contributor Covenant Code of Conduct](https://docs.rapids.ai/resources/conduct/).

From defad5eac490a90be5525c578ce954b2095f2812 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Wed, 16 Nov 2022 23:43:44 +0530
Subject: [PATCH 177/202] byte_range support for JSON Lines format (#12017)

This PR adds support for byte_range to be used in nested JSON parser for JSON Lines format (newline delimited JSON http://ndjson.org/)
The record delimiter "New lines" are only expected at the end of each record. Newlines in middle of record or within quotes are not expected and will lead to unknown behaviour. The record delimiters are not context aware in this PR.

This PR provides libcudf APIs, Cython APIs and python tests to enable byte range support. This will allow dask to do distributed/segmented parsing of JSON.

No Dask changes

Addresses part of https://github.com/rapidsai/cudf/issues/11843
Depends on  #12060

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Elias Stehle (https://github.com/elstehle)
  - Lawrence Mitchell (https://github.com/wence-)
  - Robert Maynard (https://github.com/robertmaynard)

URL: https://github.com/rapidsai/cudf/pull/12017
---
 cpp/CMakeLists.txt                            |   1 +
 .../io/json/experimental/byte_range_info.cu   |  36 +++++
 cpp/src/io/json/experimental/read_json.cpp    |  98 +++++++++++++-
 cpp/src/io/json/experimental/read_json.hpp    |  11 +-
 cpp/tests/CMakeLists.txt                      |   2 +-
 cpp/tests/io/json_chunked_reader.cpp          | 128 ++++++++++++++++++
 python/cudf/cudf/tests/test_json.py           |  61 +++++++--
 7 files changed, 318 insertions(+), 19 deletions(-)
 create mode 100644 cpp/src/io/json/experimental/byte_range_info.cu
 create mode 100644 cpp/tests/io/json_chunked_reader.cpp

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 7e8ee5b60bf..c52248c1eab 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -329,6 +329,7 @@ add_library(
   src/io/json/json_tree.cu
   src/io/json/nested_json_gpu.cu
   src/io/json/reader_impl.cu
+  src/io/json/experimental/byte_range_info.cu
   src/io/json/experimental/read_json.cpp
   src/io/orc/aggregate_orc_metadata.cpp
   src/io/orc/dict_enc.cu
diff --git a/cpp/src/io/json/experimental/byte_range_info.cu b/cpp/src/io/json/experimental/byte_range_info.cu
new file mode 100644
index 00000000000..d6e30d090a5
--- /dev/null
+++ b/cpp/src/io/json/experimental/byte_range_info.cu
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/types.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+#include <thrust/find.h>
+
+namespace cudf::io::detail::json::experimental {
+
+// Extract the first character position in the string.
+size_type find_first_delimiter(device_span<char const> d_data,
+                               char const delimiter,
+                               rmm::cuda_stream_view stream)
+{
+  auto const first_delimiter_position =
+    thrust::find(rmm::exec_policy(stream), d_data.begin(), d_data.end(), delimiter);
+  return first_delimiter_position != d_data.end() ? first_delimiter_position - d_data.begin() : -1;
+}
+
+}  // namespace cudf::io::detail::json::experimental
diff --git a/cpp/src/io/json/experimental/read_json.cpp b/cpp/src/io/json/experimental/read_json.cpp
index b0b7d5baa0f..87d196131ca 100644
--- a/cpp/src/io/json/experimental/read_json.cpp
+++ b/cpp/src/io/json/experimental/read_json.cpp
@@ -64,19 +64,105 @@ std::vector<uint8_t> ingest_raw_input(host_span<std::unique_ptr<datasource>> con
   }
 }
 
+size_type find_first_delimiter_in_chunk(host_span<std::unique_ptr<cudf::io::datasource>> sources,
+                                        json_reader_options const& reader_opts,
+                                        char const delimiter,
+                                        rmm::cuda_stream_view stream)
+{
+  auto const buffer = ingest_raw_input(sources,
+                                       reader_opts.get_compression(),
+                                       reader_opts.get_byte_range_offset(),
+                                       reader_opts.get_byte_range_size());
+  auto d_data       = rmm::device_uvector<char>(buffer.size(), stream);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data(),
+                                buffer.data(),
+                                buffer.size() * sizeof(decltype(buffer)::value_type),
+                                cudaMemcpyHostToDevice,
+                                stream.value()));
+  return find_first_delimiter(d_data, delimiter, stream);
+}
+
+size_type find_first_delimiter_in_chunk(host_span<unsigned char const> buffer,
+                                        char const delimiter,
+                                        rmm::cuda_stream_view stream)
+{
+  auto d_data = rmm::device_uvector<char>(buffer.size(), stream);
+  CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data(),
+                                buffer.data(),
+                                buffer.size() * sizeof(decltype(buffer)::value_type),
+                                cudaMemcpyHostToDevice,
+                                stream.value()));
+  return find_first_delimiter(d_data, delimiter, stream);
+}
+
+bool should_load_whole_source(json_reader_options const& reader_opts)
+{
+  return reader_opts.get_byte_range_offset() == 0 and  //
+         reader_opts.get_byte_range_size() == 0;
+}
+
+/**
+ * @brief Get the byte range between record starts and ends starting from the given range.
+ *
+ * if get_byte_range_offset == 0, then we can skip the first delimiter search
+ * if get_byte_range_offset != 0, then we need to search for the first delimiter in given range.
+ * if not found, skip this chunk, if found, then search for first delimiter in next range until we
+ * find a delimiter. Use this as actual range for parsing.
+ *
+ * @param sources Data sources to read from
+ * @param reader_opts JSON reader options with range offset and range size
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @return Byte range for parsing
+ */
+auto get_record_range_raw_input(host_span<std::unique_ptr<datasource>> sources,
+                                json_reader_options const& reader_opts,
+                                rmm::cuda_stream_view stream)
+{
+  auto buffer = ingest_raw_input(sources,
+                                 reader_opts.get_compression(),
+                                 reader_opts.get_byte_range_offset(),
+                                 reader_opts.get_byte_range_size());
+  if (should_load_whole_source(reader_opts)) return buffer;
+  auto first_delim_pos = reader_opts.get_byte_range_offset() == 0
+                           ? 0
+                           : find_first_delimiter_in_chunk(buffer, '\n', stream);
+  if (first_delim_pos == -1) {
+    return std::vector<uint8_t>{};
+  } else {
+    first_delim_pos = first_delim_pos + reader_opts.get_byte_range_offset();
+    // Find next delimiter
+    decltype(first_delim_pos) next_delim_pos = -1;
+    auto const total_source_size             = sources_size(sources, 0, 0);
+    auto current_offset = reader_opts.get_byte_range_offset() + reader_opts.get_byte_range_size();
+    while (current_offset < total_source_size and next_delim_pos == -1) {
+      buffer = ingest_raw_input(
+        sources, reader_opts.get_compression(), current_offset, reader_opts.get_byte_range_size());
+      next_delim_pos = find_first_delimiter_in_chunk(buffer, '\n', stream);
+      if (next_delim_pos == -1) { current_offset += reader_opts.get_byte_range_size(); }
+    }
+    if (next_delim_pos == -1) {
+      next_delim_pos = total_source_size;
+    } else {
+      next_delim_pos = next_delim_pos + current_offset;
+    }
+    return ingest_raw_input(
+      sources, reader_opts.get_compression(), first_delim_pos, next_delim_pos - first_delim_pos);
+  }
+}
+
 table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               json_reader_options const& reader_opts,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr)
 {
   CUDF_FUNC_RANGE();
-  CUDF_EXPECTS(reader_opts.get_byte_range_offset() == 0 and reader_opts.get_byte_range_size() == 0,
-               "specifying a byte range is not yet supported");
+  if (not should_load_whole_source(reader_opts)) {
+    CUDF_EXPECTS(reader_opts.is_enabled_lines(),
+                 "specifying a byte range is supported only for json lines");
+  }
+
+  auto const buffer = get_record_range_raw_input(sources, reader_opts, stream);
 
-  auto const buffer = ingest_raw_input(sources,
-                                       reader_opts.get_compression(),
-                                       reader_opts.get_byte_range_offset(),
-                                       reader_opts.get_byte_range_size());
   auto data = host_span<char const>(reinterpret_cast<char const*>(buffer.data()), buffer.size());
 
   try {
diff --git a/cpp/src/io/json/experimental/read_json.hpp b/cpp/src/io/json/experimental/read_json.hpp
index c9f74b2cc41..48e104c4254 100644
--- a/cpp/src/io/json/experimental/read_json.hpp
+++ b/cpp/src/io/json/experimental/read_json.hpp
@@ -33,4 +33,13 @@ table_with_metadata read_json(host_span<std::unique_ptr<datasource>> sources,
                               rmm::cuda_stream_view stream,
                               rmm::mr::device_memory_resource* mr);
 
-}
+size_type find_first_delimiter(device_span<char const> d_data,
+                               char const delimiter,
+                               rmm::cuda_stream_view stream);
+
+size_type find_first_delimiter_in_chunk(host_span<std::unique_ptr<cudf::io::datasource>> sources,
+                                        json_reader_options const& reader_opts,
+                                        char const delimiter,
+                                        rmm::cuda_stream_view stream);
+
+}  // namespace cudf::io::detail::json::experimental
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 5ff2e9bf6d6..c602ccc7374 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -224,7 +224,7 @@ ConfigureTest(CSV_TEST io/csv_test.cpp)
 ConfigureTest(FILE_IO_TEST io/file_io_test.cpp)
 ConfigureTest(ORC_TEST io/orc_test.cpp)
 ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
-ConfigureTest(JSON_TEST io/json_test.cpp)
+ConfigureTest(JSON_TEST io/json_test.cpp io/json_chunked_reader.cpp)
 ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu)
 ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp)
 ConfigureTest(ARROW_IO_SOURCE_TEST io/arrow_io_source_test.cpp)
diff --git a/cpp/tests/io/json_chunked_reader.cpp b/cpp/tests/io/json_chunked_reader.cpp
new file mode 100644
index 00000000000..28b41c5691f
--- /dev/null
+++ b/cpp/tests/io/json_chunked_reader.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/table_utilities.hpp>
+
+#include <io/json/experimental/read_json.hpp>
+
+/**
+ * @brief Base test fixture for JSON reader tests
+ */
+struct JsonReaderTest : public cudf::test::BaseFixture {
+};
+
+// function to extract first delimiter in the string in each chunk,
+// collate together and form byte_range for each chunk,
+// parse separately.
+std::vector<cudf::io::table_with_metadata> skeleton_for_parellel_chunk_reader(
+  cudf::host_span<std::unique_ptr<cudf::io::datasource>> sources,
+  cudf::io::json_reader_options const& reader_opts,
+  int32_t chunk_size,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr)
+{
+  using namespace cudf::io::detail::json::experimental;
+  using cudf::size_type;
+  // assuming single source.
+  size_t total_source_size = 0;
+  for (auto const& source : sources) {
+    total_source_size += source->size();
+  }
+  size_t num_chunks                = (total_source_size + chunk_size - 1) / chunk_size;
+  constexpr size_type no_min_value = -1;
+
+  // Get the first delimiter in each chunk.
+  std::vector<size_type> first_delimiter_index(num_chunks);
+  auto reader_opts_chunk = reader_opts;
+  for (size_t i = 0; i < num_chunks; i++) {
+    auto const chunk_start = i * chunk_size;
+    reader_opts_chunk.set_byte_range_offset(chunk_start);
+    reader_opts_chunk.set_byte_range_size(chunk_size);
+    first_delimiter_index[i] =
+      find_first_delimiter_in_chunk(sources, reader_opts_chunk, '\n', stream);
+    if (first_delimiter_index[i] != no_min_value) { first_delimiter_index[i] += chunk_start; }
+  }
+
+  // Process and allocate record start, end for each worker.
+  using record_range = std::pair<size_type, size_type>;
+  std::vector<record_range> record_ranges;
+  record_ranges.reserve(num_chunks);
+  first_delimiter_index[0] = 0;
+  auto prev                = first_delimiter_index[0];
+  for (size_t i = 1; i < num_chunks; i++) {
+    if (first_delimiter_index[i] == no_min_value) continue;
+    record_ranges.push_back({prev, first_delimiter_index[i]});
+    prev = first_delimiter_index[i];
+  }
+  record_ranges.push_back({prev, total_source_size});
+
+  std::vector<cudf::io::table_with_metadata> tables;
+  // Process each chunk in parallel.
+  for (auto const [chunk_start, chunk_end] : record_ranges) {
+    if (chunk_start == -1 or chunk_end == -1) continue;
+    reader_opts_chunk.set_byte_range_offset(chunk_start);
+    reader_opts_chunk.set_byte_range_size(chunk_end - chunk_start);
+    tables.push_back(read_json(sources, reader_opts_chunk, stream, mr));
+  }
+  // assume all records have same number of columns, and inferred same type. (or schema is passed)
+  // TODO a step before to merge all columns, types and infer final schema.
+  return tables;
+}
+
+TEST_F(JsonReaderTest, ByteRange)
+{
+  std::string const json_string = R"(
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
+    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14 })";
+
+  // Initialize parsing options (reading json lines)
+  cudf::io::json_reader_options json_lines_options =
+    cudf::io::json_reader_options::builder(
+      cudf::io::source_info{json_string.c_str(), json_string.size()})
+      .compression(cudf::io::compression_type::NONE)
+      .lines(true)
+      .experimental(true);
+
+  // Read full test data via existing, nested JSON lines reader
+  cudf::io::table_with_metadata current_reader_table = cudf::io::read_json(json_lines_options);
+
+  auto datasources = cudf::io::datasource::create(json_lines_options.get_source().buffers());
+
+  // Test for different chunk sizes
+  for (auto chunk_size : {7, 10, 15, 20, 40, 50, 100, 200, 500}) {
+    auto const tables = skeleton_for_parellel_chunk_reader(datasources,
+                                                           json_lines_options,
+                                                           chunk_size,
+                                                           cudf::get_default_stream(),
+                                                           rmm::mr::get_current_device_resource());
+
+    auto table_views = std::vector<cudf::table_view>(tables.size());
+    std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) {
+      return table.tbl->view();
+    });
+    auto result = cudf::concatenate(table_views);
+
+    // Verify that the data read via chunked reader matches the data read via nested JSON reader
+    // cannot use EQUAL due to concatenate removing null mask
+    CUDF_TEST_EXPECT_TABLES_EQUIVALENT(current_reader_table.tbl->view(), result->view());
+  }
+}
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 14238be7bc1..2eda71c5c45 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -929,21 +929,60 @@ def test_json_dtypes_nested_data():
         (
             "missing",
             """
-            { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
-            { "a": { "y" : 6}, "b" : [4, 5   ]}
-            { "a": { "y" : 6}, "c": 13 }
-            { "a": { "y" : 6}, "b" : [7      ], "c": 14 }
-        """,
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ]          }
+    { "a": { "y" : 6}, "c": 13                  }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14 }
+""",
+        ),
+        pytest.param(
+            "dtype_mismatch",
+            """\
+    { "a": { "y" : 6}, "b" : [1, 2, 3], "c": 11 }
+    { "a": { "y" : 6}, "b" : [4, 5   ], "c": 12 }
+    { "a": { "y" : 6}, "b" : [6      ], "c": 13 }
+    { "a": { "y" : 6}, "b" : [7      ], "c": 14.0 }""",
         ),
     ],
 )
-def test_order_nested_json_reader(tag, data):
-    expected = cudf.read_json(StringIO(data), engine="pandas", lines=True)
-    target = cudf.read_json(
-        StringIO(data), engine="cudf_experimental", lines=True
-    )
+class TestNestedJsonReaderCommon:
+    @pytest.mark.parametrize("chunk_size", [10, 100, 1024, 1024 * 1024])
+    def test_chunked_nested_json_reader(self, tag, data, chunk_size):
+        expected = cudf.read_json(
+            StringIO(data), engine="cudf_experimental", lines=True
+        )
 
-    assert_eq(expected, target, check_dtype=True)
+        source_size = len(data)
+        chunks = []
+        for chunk_start in range(0, source_size, chunk_size):
+            chunks.append(
+                cudf.read_json(
+                    StringIO(data),
+                    engine="cudf_experimental",
+                    byte_range=[chunk_start, chunk_size],
+                    lines=True,
+                )
+            )
+        df = cudf.concat(chunks, ignore_index=True)
+        if tag == "missing" and chunk_size == 10:
+            with pytest.raises(AssertionError):
+                # nested JSON reader inferences integer with nulls as float64
+                assert expected.to_arrow().equals(df.to_arrow())
+        else:
+            assert expected.to_arrow().equals(df.to_arrow())
+
+    def test_order_nested_json_reader(self, tag, data):
+        expected = pd.read_json(StringIO(data), lines=True)
+        target = cudf.read_json(
+            StringIO(data), engine="cudf_experimental", lines=True
+        )
+        if tag == "dtype_mismatch":
+            with pytest.raises(AssertionError):
+                # pandas parses integer values in float representation
+                # as integer
+                assert pa.Table.from_pandas(expected).equals(target.to_arrow())
+        else:
+            assert pa.Table.from_pandas(expected).equals(target.to_arrow())
 
 
 def test_json_round_trip_gzip():

From afb3c97597f66be4172ff2db832c5498d1ca13a6 Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Wed, 16 Nov 2022 15:00:21 -0500
Subject: [PATCH 178/202] Support nested types as groupby keys in libcudf
 (#11792)

Authors:
  - Yunsong Wang (https://github.com/PointKernel)
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/11792
---
 .../cudf/detail/groupby/sort_helper.hpp       |   4 +-
 cpp/src/groupby/hash/groupby.cu               |   8 --
 cpp/src/groupby/sort/common_utils.cuh         |  62 +++++++++++
 cpp/src/groupby/sort/functors.hpp             |   2 +
 cpp/src/groupby/sort/group_nunique.cu         | 101 +++++++-----------
 cpp/src/groupby/sort/group_rank_scan.cu       |  57 +++-------
 cpp/src/groupby/sort/sort_helper.cu           |  75 +++----------
 cpp/src/sort/sort_impl.cuh                    |   1 -
 cpp/tests/groupby/keys_tests.cpp              |   6 +-
 cpp/tests/groupby/lists_tests.cu              |  11 +-
 10 files changed, 141 insertions(+), 186 deletions(-)
 create mode 100644 cpp/src/groupby/sort/common_utils.cuh

diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp
index 8705bbd29cb..a5060cd3d36 100644
--- a/cpp/include/cudf/detail/groupby/sort_helper.hpp
+++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -218,8 +218,6 @@ struct sort_groupby_helper {
   column_ptr _unsorted_keys_labels;  ///< Group labels for unsorted _keys
   column_ptr _keys_bitmask_column;   ///< Column representing rows with one or more nulls values
   table_view _keys;                  ///< Input keys to sort by
-  table_view _unflattened_keys;      ///< Input keys, unflattened and possibly nested
-  structs::detail::flattened_table _flattened;  ///< Support datastructures for _keys
 
   index_vector_ptr
     _group_offsets;  ///< Indices into sorted _keys indicating starting index of each groups
diff --git a/cpp/src/groupby/hash/groupby.cu b/cpp/src/groupby/hash/groupby.cu
index 90c869b8c58..8410d499f1a 100644
--- a/cpp/src/groupby/hash/groupby.cu
+++ b/cpp/src/groupby/hash/groupby.cu
@@ -668,14 +668,6 @@ std::pair<std::unique_ptr<table>, std::vector<aggregation_result>> groupby(
   rmm::cuda_stream_view stream,
   rmm::mr::device_memory_resource* mr)
 {
-  auto const has_nested_column =
-    std::any_of(keys.begin(), keys.end(), [](cudf::column_view const& col) {
-      return cudf::is_nested(col.type());
-    });
-  if (has_nested_column and include_null_keys == cudf::null_policy::EXCLUDE) {
-    CUDF_FAIL("Null keys of nested type cannot be excluded.");
-  }
-
   cudf::detail::result_cache cache(requests.size());
 
   std::unique_ptr<table> unique_keys =
diff --git a/cpp/src/groupby/sort/common_utils.cuh b/cpp/src/groupby/sort/common_utils.cuh
new file mode 100644
index 00000000000..fe5d7c325ca
--- /dev/null
+++ b/cpp/src/groupby/sort/common_utils.cuh
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cudf/types.hpp>
+
+namespace cudf::groupby::detail {
+/**
+ * @brief Functor to compare two rows of a table in given permutation order
+ *
+ * This is useful to identify unique elements in a sorted order table, when the permutation order is
+ * the sorted order of the table.
+ */
+template <typename ComparatorT, typename Iterator>
+struct permuted_row_equality_comparator {
+  /**
+   * @brief Constructs a permuted comparator object which compares two rows of the table in given
+   * permutation order
+   *
+   * @param comparator Equality comparator
+   * @param permutation The permutation map that specifies the effective ordering of
+   * `t`. Must be the same size as `t.num_rows()`
+   */
+  permuted_row_equality_comparator(ComparatorT const& comparator, Iterator const permutation)
+    : _comparator{comparator}, _permutation{permutation}
+  {
+  }
+
+  /**
+   * @brief Returns true if the two rows at the specified indices in the permuted
+   * order are equivalent.
+   *
+   * For example, comparing rows `i` and `j` is equivalent to comparing
+   * rows `permutation[i]` and `permutation[j]` in the original table.
+   *
+   * @param lhs The index of the first row
+   * @param rhs The index of the second row
+   * @returns true if the two specified rows in the permuted order are equivalent
+   */
+  __device__ bool operator()(cudf::size_type lhs, cudf::size_type rhs) const
+  {
+    return _comparator(_permutation[lhs], _permutation[rhs]);
+  };
+
+ private:
+  ComparatorT const _comparator;
+  Iterator const _permutation;
+};
+}  // namespace cudf::groupby::detail
diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp
index 748e34a583d..bcc190c745b 100644
--- a/cpp/src/groupby/sort/functors.hpp
+++ b/cpp/src/groupby/sort/functors.hpp
@@ -13,6 +13,8 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+#pragma once
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_view.hpp>
 #include <cudf/detail/aggregation/result_cache.hpp>
diff --git a/cpp/src/groupby/sort/group_nunique.cu b/cpp/src/groupby/sort/group_nunique.cu
index b719698b6b5..c411e654913 100644
--- a/cpp/src/groupby/sort/group_nunique.cu
+++ b/cpp/src/groupby/sort/group_nunique.cu
@@ -16,7 +16,7 @@
 
 #include <cudf/aggregation.hpp>
 #include <cudf/column/column_factories.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/types.hpp>
 #include <cudf/utilities/span.hpp>
 
@@ -33,82 +33,45 @@ namespace groupby {
 namespace detail {
 namespace {
 
-template <typename T, typename Nullate>
+template <typename Nullate>
 struct is_unique_iterator_fn {
+  using comparator_type =
+    typename cudf::experimental::row::equality::device_row_comparator<Nullate>;
+
   Nullate nulls;
   column_device_view const v;
-  element_equality_comparator<Nullate> equal;
+  comparator_type equal;
   null_policy null_handling;
   size_type const* group_offsets;
   size_type const* group_labels;
 
   is_unique_iterator_fn(Nullate nulls,
                         column_device_view const& v,
+                        comparator_type const& equal,
                         null_policy null_handling,
                         size_type const* group_offsets,
                         size_type const* group_labels)
     : nulls{nulls},
       v{v},
-      equal{nulls, v, v},
+      equal{equal},
       null_handling{null_handling},
       group_offsets{group_offsets},
       group_labels{group_labels}
   {
   }
 
-  __device__ size_type operator()(size_type i)
+  __device__ size_type operator()(size_type i) const
   {
-    bool is_input_countable =
+    auto const is_input_countable =
       !nulls || (null_handling == null_policy::INCLUDE || v.is_valid_nocheck(i));
-    bool is_unique = is_input_countable &&
-                     (group_offsets[group_labels[i]] == i ||          // first element or
-                      (not equal.template operator()<T>(i, i - 1)));  // new unique value in sorted
+    auto const is_unique =
+      is_input_countable && (group_offsets[group_labels[i]] == i ||  // first element or
+                             (not equal(i, i - 1)));                 // new unique value in sorted
     return static_cast<size_type>(is_unique);
   }
 };
-
-struct nunique_functor {
-  template <typename T>
-  std::enable_if_t<cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>> operator()(
-    column_view const& values,
-    cudf::device_span<size_type const> group_labels,
-    size_type const num_groups,
-    cudf::device_span<size_type const> group_offsets,
-    null_policy null_handling,
-    rmm::cuda_stream_view stream,
-    rmm::mr::device_memory_resource* mr)
-  {
-    auto result = make_numeric_column(
-      data_type(type_to_id<size_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
-
-    if (num_groups == 0) { return result; }
-
-    auto values_view        = column_device_view::create(values, stream);
-    auto is_unique_iterator = thrust::make_transform_iterator(
-      thrust::make_counting_iterator<size_type>(0),
-      is_unique_iterator_fn<T, nullate::DYNAMIC>{nullate::DYNAMIC{values.has_nulls()},
-                                                 *values_view,
-                                                 null_handling,
-                                                 group_offsets.data(),
-                                                 group_labels.data()});
-    thrust::reduce_by_key(rmm::exec_policy(stream),
-                          group_labels.begin(),
-                          group_labels.end(),
-                          is_unique_iterator,
-                          thrust::make_discard_iterator(),
-                          result->mutable_view().begin<size_type>());
-
-    return result;
-  }
-
-  template <typename T, typename... Args>
-  std::enable_if_t<!cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>> operator()(
-    Args&&...)
-  {
-    CUDF_FAIL("list_view group_nunique not supported yet");
-  }
-};
 }  // namespace
+
 std::unique_ptr<column> group_nunique(column_view const& values,
                                       cudf::device_span<size_type const> group_labels,
                                       size_type const num_groups,
@@ -121,15 +84,33 @@ std::unique_ptr<column> group_nunique(column_view const& values,
   CUDF_EXPECTS(static_cast<size_t>(values.size()) == group_labels.size(),
                "Size of values column should be same as that of group labels");
 
-  return type_dispatcher(values.type(),
-                         nunique_functor{},
-                         values,
-                         group_labels,
-                         num_groups,
-                         group_offsets,
-                         null_handling,
-                         stream,
-                         mr);
+  auto result = make_numeric_column(
+    data_type(type_to_id<size_type>()), num_groups, mask_state::UNALLOCATED, stream, mr);
+
+  if (num_groups == 0) { return result; }
+
+  auto const values_view = table_view{{values}};
+  auto const comparator  = cudf::experimental::row::equality::self_comparator{values_view, stream};
+  auto const d_equal     = comparator.equal_to(
+    cudf::nullate::DYNAMIC{cudf::has_nested_nulls(values_view)}, null_equality::EQUAL);
+
+  auto const d_values_view = column_device_view::create(values, stream);
+  auto const is_unique_iterator =
+    thrust::make_transform_iterator(thrust::counting_iterator<cudf::size_type>(0),
+                                    is_unique_iterator_fn{nullate::DYNAMIC{values.has_nulls()},
+                                                          *d_values_view,
+                                                          d_equal,
+                                                          null_handling,
+                                                          group_offsets.data(),
+                                                          group_labels.data()});
+  thrust::reduce_by_key(rmm::exec_policy(stream),
+                        group_labels.begin(),
+                        group_labels.end(),
+                        is_unique_iterator,
+                        thrust::make_discard_iterator(),
+                        result->mutable_view().begin<size_type>());
+
+  return result;
 }
 
 }  // namespace detail
diff --git a/cpp/src/groupby/sort/group_rank_scan.cu b/cpp/src/groupby/sort/group_rank_scan.cu
index cce84384ef7..149f026ffe6 100644
--- a/cpp/src/groupby/sort/group_rank_scan.cu
+++ b/cpp/src/groupby/sort/group_rank_scan.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "common_utils.cuh"
+
 #include <cudf/column/column.hpp>
 #include <cudf/column/column_factories.hpp>
 #include <cudf/detail/aggregation/aggregation.hpp>
@@ -21,7 +23,7 @@
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/device_operators.cuh>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
@@ -39,36 +41,6 @@ namespace groupby {
 namespace detail {
 namespace {
 
-/**
- * @brief Functor to compare two rows of a table in given permutation order
- * This is useful to identify unique elements in a sorted order table, when the permutation order is
- * the sorted order of the table.
- *
- */
-template <typename Iterator>
-struct permuted_comparator {
-  /**
-   * @brief comparator object which compares two rows of the table in given permutation order
-   *
-   * @param device_table Device table to compare
-   * @param permutation The permutation order, integer type column.
-   * @param has_nulls whether the table has nulls
-   */
-  permuted_comparator(table_device_view device_table, Iterator const permutation, bool has_nulls)
-    : comparator(nullate::DYNAMIC{has_nulls}, device_table, device_table, null_equality::EQUAL),
-      permutation(permutation)
-  {
-  }
-  __device__ bool operator()(size_type index1, size_type index2) const
-  {
-    return comparator(permutation[index1], permutation[index2]);
-  };
-
- private:
-  row_equality_comparator<nullate::DYNAMIC> comparator;
-  Iterator const permutation;
-};
-
 /**
  * @brief generate grouped row ranks or dense ranks using a row comparison then scan the results
  *
@@ -99,32 +71,29 @@ std::unique_ptr<column> rank_generator(column_view const& grouped_values,
                                        rmm::cuda_stream_view stream,
                                        rmm::mr::device_memory_resource* mr)
 {
-  auto const flattened = cudf::structs::detail::flatten_nested_columns(
-    table_view{{grouped_values}}, {}, {}, structs::detail::column_nullability::MATCH_INCOMING);
-  auto const d_flat_order = table_device_view::create(flattened, stream);
-  auto sorted_index_order = value_order.begin<size_type>();
-  auto comparator         = permuted_comparator(*d_flat_order, sorted_index_order, has_nulls);
+  auto const comparator =
+    cudf::experimental::row::equality::self_comparator{table_view{{grouped_values}}, stream};
+  auto const d_equal = comparator.equal_to(cudf::nullate::DYNAMIC{has_nulls}, null_equality::EQUAL);
+  auto const permuted_equal =
+    permuted_row_equality_comparator(d_equal, value_order.begin<size_type>());
 
-  auto ranks         = make_fixed_width_column(data_type{type_to_id<size_type>()},
-                                       flattened.flattened_columns().num_rows(),
-                                       mask_state::UNALLOCATED,
-                                       stream,
-                                       mr);
+  auto ranks = make_fixed_width_column(
+    data_type{type_to_id<size_type>()}, grouped_values.size(), mask_state::UNALLOCATED, stream, mr);
   auto mutable_ranks = ranks->mutable_view();
 
   auto unique_identifier = [labels  = group_labels.begin(),
                             offsets = group_offsets.begin(),
-                            comparator,
+                            permuted_equal,
                             resolver] __device__(size_type row_index) {
     auto const group_start = offsets[labels[row_index]];
     if constexpr (forward) {
       // First value of equal values is 1.
-      return resolver(row_index == group_start || !comparator(row_index, row_index - 1),
+      return resolver(row_index == group_start || !permuted_equal(row_index, row_index - 1),
                       row_index - group_start);
     } else {
       auto const group_end = offsets[labels[row_index] + 1];
       // Last value of equal values is 1.
-      return resolver(row_index + 1 == group_end || !comparator(row_index, row_index + 1),
+      return resolver(row_index + 1 == group_end || !permuted_equal(row_index, row_index + 1),
                       row_index - group_start);
     }
   };
diff --git a/cpp/src/groupby/sort/sort_helper.cu b/cpp/src/groupby/sort/sort_helper.cu
index 53ab65e9be7..2bf63cb42fc 100644
--- a/cpp/src/groupby/sort/sort_helper.cu
+++ b/cpp/src/groupby/sort/sort_helper.cu
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "common_utils.cuh"
+
 #include <cudf/column/column_factories.hpp>
 #include <cudf/copying.hpp>
 #include <cudf/detail/copy.hpp>
@@ -26,7 +28,7 @@
 #include <cudf/detail/sorting.hpp>
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/strings/string_view.hpp>
-#include <cudf/table/row_operators.cuh>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/traits.hpp>
 
@@ -44,48 +46,6 @@
 #include <numeric>
 #include <tuple>
 
-namespace {
-/**
- * @brief Compares two `table` rows for equality as if the table were
- * ordered according to a specified permutation map.
- */
-struct permuted_row_equality_comparator {
-  cudf::row_equality_comparator<cudf::nullate::DYNAMIC> _comparator;
-  cudf::size_type const* _map;
-
-  /**
-   * @brief Construct a permuted_row_equality_comparator.
-   *
-   * @param t The `table` whose rows will be compared
-   * @param map The permutation map that specifies the effective ordering of
-   * `t`. Must be the same size as `t.num_rows()`
-   */
-  permuted_row_equality_comparator(cudf::table_device_view const& t,
-                                   cudf::size_type const* map,
-                                   bool nullable = true)
-    : _comparator(cudf::nullate::DYNAMIC{nullable}, t, t, cudf::null_equality::EQUAL), _map{map}
-  {
-  }
-
-  /**
-   * @brief Returns true if the two rows at the specified indices in the permuted
-   * order are equivalent.
-   *
-   * For example, comparing rows `i` and `j` is
-   * equivalent to comparing rows `map[i]` and `map[j]` in the original table.
-   *
-   * @param lhs The index of the first row
-   * @param rhs The index of the second row
-   * @returns true if the two specified rows in the permuted order are equivalent
-   */
-  __device__ inline bool operator()(cudf::size_type lhs, cudf::size_type rhs)
-  {
-    return _comparator(_map[lhs], _map[rhs]);
-  }
-};
-
-}  // namespace
-
 namespace cudf {
 namespace groupby {
 namespace detail {
@@ -94,19 +54,13 @@ namespace sort {
 sort_groupby_helper::sort_groupby_helper(table_view const& keys,
                                          null_policy include_null_keys,
                                          sorted keys_pre_sorted)
-  : _unflattened_keys(keys),
+  : _keys(keys),
     _num_keys(-1),
     _keys_pre_sorted(keys_pre_sorted),
     _include_null_keys(include_null_keys)
 {
   using namespace cudf::structs::detail;
 
-  _flattened                 = flatten_nested_columns(keys, {}, {}, column_nullability::FORCE);
-  _keys                      = _flattened;
-  auto is_supported_key_type = [](auto col) { return cudf::is_equality_comparable(col.type()); };
-  CUDF_EXPECTS(std::all_of(_keys.begin(), _keys.end(), is_supported_key_type),
-               "Unsupported groupby key type does not support equality comparison");
-
   // Cannot depend on caller's sorting if the column contains nulls,
   // and null values are to be excluded.
   // Re-sort the data, to filter out nulls more easily.
@@ -191,16 +145,17 @@ sort_groupby_helper::index_vector const& sort_groupby_helper::group_offsets(
 
   _group_offsets = std::make_unique<index_vector>(num_keys(stream) + 1, stream);
 
-  auto device_input_table = table_device_view::create(_keys, stream);
-  auto sorted_order       = key_sort_order(stream).data<size_type>();
+  auto const comparator  = cudf::experimental::row::equality::self_comparator{_keys, stream};
+  auto const d_key_equal = comparator.equal_to(
+    cudf::nullate::DYNAMIC{cudf::has_nested_nulls(_keys)}, null_equality::EQUAL);
+  auto const sorted_order = key_sort_order(stream).data<size_type>();
   decltype(_group_offsets->begin()) result_end;
 
-  result_end = thrust::unique_copy(
-    rmm::exec_policy(stream),
-    thrust::make_counting_iterator<size_type>(0),
-    thrust::make_counting_iterator<size_type>(num_keys(stream)),
-    _group_offsets->begin(),
-    permuted_row_equality_comparator(*device_input_table, sorted_order, has_nulls(_keys)));
+  result_end = thrust::unique_copy(rmm::exec_policy(stream),
+                                   thrust::counting_iterator<size_type>(0),
+                                   thrust::counting_iterator<size_type>(num_keys(stream)),
+                                   _group_offsets->begin(),
+                                   permuted_row_equality_comparator(d_key_equal, sorted_order));
 
   size_type num_groups = thrust::distance(_group_offsets->begin(), result_end);
   _group_offsets->set_element(num_groups, num_keys(stream), stream);
@@ -315,7 +270,7 @@ std::unique_ptr<table> sort_groupby_helper::unique_keys(rmm::cuda_stream_view st
   auto gather_map_it = thrust::make_transform_iterator(
     group_offsets(stream).begin(), [idx_data] __device__(size_type i) { return idx_data[i]; });
 
-  return cudf::detail::gather(_unflattened_keys,
+  return cudf::detail::gather(_keys,
                               gather_map_it,
                               gather_map_it + num_groups(stream),
                               out_of_bounds_policy::DONT_CHECK,
@@ -326,7 +281,7 @@ std::unique_ptr<table> sort_groupby_helper::unique_keys(rmm::cuda_stream_view st
 std::unique_ptr<table> sort_groupby_helper::sorted_keys(rmm::cuda_stream_view stream,
                                                         rmm::mr::device_memory_resource* mr)
 {
-  return cudf::detail::gather(_unflattened_keys,
+  return cudf::detail::gather(_keys,
                               key_sort_order(stream),
                               cudf::out_of_bounds_policy::DONT_CHECK,
                               cudf::detail::negative_index_policy::NOT_ALLOWED,
diff --git a/cpp/src/sort/sort_impl.cuh b/cpp/src/sort/sort_impl.cuh
index 97fc8ac14cb..fc024b42616 100644
--- a/cpp/src/sort/sort_impl.cuh
+++ b/cpp/src/sort/sort_impl.cuh
@@ -21,7 +21,6 @@
 #include <cudf/detail/structs/utilities.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_device_view.cuh>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/traits.hpp>
diff --git a/cpp/tests/groupby/keys_tests.cpp b/cpp/tests/groupby/keys_tests.cpp
index d2f2f233953..efd0f52114e 100644
--- a/cpp/tests/groupby/keys_tests.cpp
+++ b/cpp/tests/groupby/keys_tests.cpp
@@ -293,8 +293,7 @@ TYPED_TEST(groupby_keys_test, structs)
   auto expect_vals = FWCW<R>{6, 1, 8, 7};
 
   auto agg = cudf::make_argmax_aggregation<groupby_aggregation>();
-  EXPECT_THROW(test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg)),
-               cudf::logic_error);
+  test_single_agg(keys, vals, expect_keys, expect_vals, std::move(agg));
 }
 
 template <typename T>
@@ -313,8 +312,7 @@ TYPED_TEST(groupby_keys_test, lists)
   // clang-format on
 
   auto agg = cudf::make_sum_aggregation<groupby_aggregation>();
-  EXPECT_THROW(test_single_agg(keys, values, expected_keys, expected_values, std::move(agg)),
-               cudf::logic_error);
+  test_single_agg(keys, values, expected_keys, expected_values, std::move(agg));
 }
 
 struct groupby_string_keys_test : public cudf::test::BaseFixture {
diff --git a/cpp/tests/groupby/lists_tests.cu b/cpp/tests/groupby/lists_tests.cu
index e4118318792..226758fe81a 100644
--- a/cpp/tests/groupby/lists_tests.cu
+++ b/cpp/tests/groupby/lists_tests.cu
@@ -149,8 +149,7 @@ void test_sum_agg(column_view const& keys,
                   column_view const& expected_keys,
                   column_view const& expected_values)
 {
-  EXPECT_THROW(test_sort_based_sum_agg(keys, values, expected_keys, expected_values),
-               cudf::logic_error);
+  test_sort_based_sum_agg(keys, values, expected_keys, expected_values);
   test_hash_based_sum_agg(keys, values, expected_keys, expected_values);
 }
 }  // namespace
@@ -189,8 +188,8 @@ TYPED_TEST(groupby_lists_test, lists_with_nulls)
   auto keys   = lcw<TypeParam> { {{1,1}, {2,2}, {3,3}, {1,1}, {2,2}}, nulls_at({1,2,4})};
   auto values = fwcw<int32_t>  {     0,     1,     2,     3,     4 };
 
-  auto expected_keys   = lcw<TypeParam> { {{1,1}, {null,null}}, null_at(1)};
-  auto expected_values = fwcw<R>        {     3,           7 };
+  auto expected_keys   = lcw<TypeParam> { {{null,null}, {1,1}}, null_at(0)};
+  auto expected_values = fwcw<R>        {           7,     3 };
   // clang-format on
 
   test_sum_agg(keys, values, expected_keys, expected_values);
@@ -207,8 +206,8 @@ TYPED_TEST(groupby_lists_test, lists_with_null_elements)
   auto values = fwcw<int32_t>{1, 2, 4, 5};
 
   auto expected_keys = lcw<TypeParam>{
-    {lcw<TypeParam>{{{1, 2, 3}, {}, {4, 5}, {}, {6, 0}}, nulls_at({1, 3})}, {}}, null_at(1)};
-  auto expected_values = fwcw<R>{3, 9};
+    {{}, lcw<TypeParam>{{{1, 2, 3}, {}, {4, 5}, {}, {6, 0}}, nulls_at({1, 3})}}, null_at(0)};
+  auto expected_values = fwcw<R>{9, 3};
 
   test_sum_agg(keys, values, expected_keys, expected_values);
 }

From 95a348b803eebb58d5f0e8abacba499e5033077d Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Wed, 16 Nov 2022 22:40:57 +0100
Subject: [PATCH 179/202] Spilling to host memory (#12106)

This PR implementing spilling of device to host memory, which is based on #11553.

Spilling can be enabled in two ways (it is disabled by default):
  - setting the environment variable `CUDF_SPILL=on`, or
  - setting the `spill` option in `cudf` by doing `cudf.set_option("spill", True)`.

Additionally, parameters are:
  - `CUDF_SPILL_ON_DEMAND=ON` / `cudf.set_option("spill_on_demand", True)`, which registers an RMM out-of-memory error handler that spills buffers in order to free up memory.
 - `CUDF_SPILL_DEVICE_LIMIT=...` / `cudf.set_option("spill_device_limit", ...)`, which sets a device memory limit in bytes.

I have limited the scope of this PR. In a follow-up PR, I will port the statistics, logging, and partial unspill from #11553.

### Design

Spilling consists of two components:

- A new buffer sub-class, `SpillableBuffer`, that implements moving of its data from host to device memory in-place.

- A spill manager that tracks all instances of `SpillableBuffer` and spills them on demand.

A global spill manager is used throughout cudf when spilling is enabled, which makes `as_buffer()` return `SpillableBuffer` instead of the default `Buffer` instances.


#### Challenges

Accessing `Buffer.ptr`, we get the device memory pointer of the buffer. This is unproblematic in the case of `Buffer` but what happens when accessing `SpillableBuffer.ptr`, which might have spilled its device memory? In this case, `SpillableBuffer` needs to unspill the memory before returning its device memory pointer. Furthermore, while this device memory pointer is being used (or could be used), `SpillableBuffer`  cannot spill its memory back to host memory because doing so would invalidate the device pointer.

To address this, we mark the `SpillableBuffer` as unspillable, we say that the buffer has been _exposed_. This can be either permanent if the device pointer is exposed to external projects or temporary while `libcudf` accesses the device memory.

The `SpillableBuffer.get_ptr()` returns the device pointer of the buffer memory just like `.ptr` but if given an instance of `SpillLock`, the buffer is only unspillable as long as the instance of `SpillLock` is alive.

For convenience, one can use the decorator/context `with_spill_lock` to associate a `SpillLock` with a lifetime bound to the context automatically.


### Overhead

When spilling is disabled, the overhead of this PR comes from the decorator `with_spill_lock`. However, this is small https://gist.github.com/madsbk/da6520e7583cf5d728a1b5a1b09200f3:
```
Micro benchmark on my local workstation:
  spilling off:
    raw:                    0.06371338899771217 us
    with-spill-lock:        1.0796624180002254 us
  spilling on:
    raw:                    0.05873749500096892 us
    with-spill-lock:        1.2184517139976379 us
```

##

Authors:
  - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12106
---
 ci/gpu/build.sh                               |   4 +
 .../source/developer_guide/library_design.md  |  33 +-
 python/cudf/cudf/_lib/binaryop.pyx            |   3 +
 python/cudf/cudf/_lib/column.pxd              |   6 +-
 python/cudf/cudf/_lib/column.pyi              |   2 -
 python/cudf/cudf/_lib/column.pyx              | 113 ++++-
 python/cudf/cudf/_lib/copying.pyx             |  22 +-
 python/cudf/cudf/_lib/datetime.pyx            |  12 +
 python/cudf/cudf/_lib/filling.pyx             |   6 +
 python/cudf/cudf/_lib/groupby.pyx             |   7 +-
 python/cudf/cudf/_lib/hash.pyx                |   4 +
 python/cudf/cudf/_lib/join.pyx                |   9 +-
 python/cudf/cudf/_lib/labeling.pyx            |   3 +
 python/cudf/cudf/_lib/lists.pyx               |  13 +
 python/cudf/cudf/_lib/null_mask.pyx           |  11 +-
 .../cudf/cudf/_lib/nvtext/edit_distance.pyx   |   4 +
 .../cudf/cudf/_lib/nvtext/generate_ngrams.pyx |   6 +-
 .../cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx |   5 +-
 python/cudf/cudf/_lib/nvtext/normalize.pyx    |   6 +-
 python/cudf/cudf/_lib/nvtext/replace.pyx      |   6 +-
 python/cudf/cudf/_lib/nvtext/stemmer.pyx      |   7 +-
 .../cudf/_lib/nvtext/subword_tokenize.pyx     |   4 +
 python/cudf/cudf/_lib/nvtext/tokenize.pyx     |   8 +
 python/cudf/cudf/_lib/partitioning.pyx        |   3 +
 python/cudf/cudf/_lib/quantiles.pyx           |   3 +
 python/cudf/cudf/_lib/reduce.pyx              |   4 +
 python/cudf/cudf/_lib/replace.pyx             |  12 +-
 python/cudf/cudf/_lib/reshape.pyx             |   4 +
 python/cudf/cudf/_lib/rolling.pyx             |   3 +
 python/cudf/cudf/_lib/round.pyx               |   5 +-
 python/cudf/cudf/_lib/search.pyx              |   4 +
 python/cudf/cudf/_lib/sort.pyx                |   6 +
 python/cudf/cudf/_lib/stream_compaction.pyx   |   6 +
 python/cudf/cudf/_lib/strings/attributes.pyx  |   7 +-
 python/cudf/cudf/_lib/strings/capitalize.pyx  |   7 +-
 python/cudf/cudf/_lib/strings/case.pyx        |   7 +-
 python/cudf/cudf/_lib/strings/char_types.pyx  |  14 +-
 python/cudf/cudf/_lib/strings/combine.pyx     |   6 +
 python/cudf/cudf/_lib/strings/contains.pyx    |   7 +
 .../strings/convert/convert_fixed_point.pyx   |   5 +
 .../_lib/strings/convert/convert_floats.pyx   |   3 +
 .../_lib/strings/convert/convert_integers.pyx |   3 +
 .../_lib/strings/convert/convert_lists.pyx    |   5 +-
 .../_lib/strings/convert/convert_urls.pyx     |   4 +
 python/cudf/cudf/_lib/strings/extract.pyx     |   3 +
 python/cudf/cudf/_lib/strings/find.pyx        |  12 +-
 .../cudf/cudf/_lib/strings/find_multiple.pyx  |   5 +-
 python/cudf/cudf/_lib/strings/findall.pyx     |   3 +
 python/cudf/cudf/_lib/strings/json.pyx        |   3 +
 python/cudf/cudf/_lib/strings/padding.pyx     |  10 +-
 python/cudf/cudf/_lib/strings/repeat.pyx      |   6 +-
 python/cudf/cudf/_lib/strings/replace.pyx     |   6 +
 python/cudf/cudf/_lib/strings/replace_re.pyx  |   7 +-
 .../cudf/_lib/strings/split/partition.pyx     |   4 +
 python/cudf/cudf/_lib/strings/split/split.pyx |  10 +
 python/cudf/cudf/_lib/strings/strip.pyx       |   5 +
 python/cudf/cudf/_lib/strings/substring.pyx   |  14 +-
 python/cudf/cudf/_lib/strings/translate.pyx   |   6 +-
 python/cudf/cudf/_lib/strings/wrap.pyx        |   5 +-
 python/cudf/cudf/_lib/transform.pyx           |  18 +-
 python/cudf/cudf/_lib/transpose.pyx           |   6 +-
 python/cudf/cudf/_lib/unary.pyx               |   7 +
 python/cudf/cudf/core/abc.py                  |   8 +-
 python/cudf/cudf/core/buffer/__init__.py      |   7 +-
 python/cudf/cudf/core/buffer/spill_manager.py | 297 +++++++++++
 .../cudf/cudf/core/buffer/spillable_buffer.py | 474 +++++++++++++++++
 python/cudf/cudf/core/buffer/utils.py         |  71 ++-
 python/cudf/cudf/core/column/column.py        |   4 +-
 python/cudf/cudf/core/df_protocol.py          |  22 +-
 python/cudf/cudf/options.py                   |  71 +++
 python/cudf/cudf/testing/_utils.py            |   8 +-
 python/cudf/cudf/tests/conftest.py            |  18 +
 python/cudf/cudf/tests/pytest.ini             |   5 +
 python/cudf/cudf/tests/test_binops.py         |   2 +
 python/cudf/cudf/tests/test_buffer.py         |  14 +-
 .../cudf/tests/test_cuda_array_interface.py   |   8 +
 python/cudf/cudf/tests/test_dataframe.py      |   2 +
 python/cudf/cudf/tests/test_groupby.py        |   2 +-
 python/cudf/cudf/tests/test_onehot.py         |   2 +
 python/cudf/cudf/tests/test_pickling.py       |  10 +-
 python/cudf/cudf/tests/test_reshape.py        |   2 +
 python/cudf/cudf/tests/test_spilling.py       | 477 ++++++++++++++++++
 .../strings_udf/_lib/cudf_jit_udf.pyx         |   2 +-
 83 files changed, 1953 insertions(+), 95 deletions(-)
 create mode 100644 python/cudf/cudf/core/buffer/spill_manager.py
 create mode 100644 python/cudf/cudf/core/buffer/spillable_buffer.py
 create mode 100644 python/cudf/cudf/tests/pytest.ini
 create mode 100644 python/cudf/cudf/tests/test_spilling.py

diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 500c3bdbcc5..9e0dd884060 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -282,6 +282,10 @@ conda list
 gpuci_logger "Python py.test for cuDF"
 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests
 
+gpuci_logger "Python py.tests for cuDF with spilling (CUDF_SPILL_DEVICE_LIMIT=1)"
+# Due to time concerns, we only run tests marked "spilling"
+CUDF_SPILL=on CUDF_SPILL_DEVICE_LIMIT=1 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov-append --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope -m spilling tests
+
 cd "$WORKSPACE/python/dask_cudf"
 gpuci_logger "Python py.test for dask-cudf"
 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf
diff --git a/docs/cudf/source/developer_guide/library_design.md b/docs/cudf/source/developer_guide/library_design.md
index 2f0fb5d86fc..ac3ace20ba3 100644
--- a/docs/cudf/source/developer_guide/library_design.md
+++ b/docs/cudf/source/developer_guide/library_design.md
@@ -203,7 +203,6 @@ For instance, all numerical types (floats and ints of different widths) are all
 
 ### Buffer
 
-
 `Column`s are in turn composed of one or more `Buffer`s.
 A `Buffer` represents a single, contiguous, device memory allocation owned by another object.
 A `Buffer` constructed from a preexisting device memory allocation (such as a CuPy array) will view that memory.
@@ -212,6 +211,38 @@ Conversely, when constructed from a host object,
 The data is then copied from the host object into the newly allocated device memory.
 You can read more about [device memory allocation with RMM here](https://github.com/rapidsai/rmm).
 
+
+### Spilling to host memory
+
+Setting the environment variable `CUDF_SPILL=on` enables automatic spilling (and "unspilling") of buffers from
+device to host to enable out-of-memory computation, i.e., computing on objects that occupy more memory than is
+available on the GPU.
+
+
+Spilling can be enabled in two ways (it is disabled by default):
+  - setting the environment variable `CUDF_SPILL=on`, or
+  - setting the `spill` option in `cudf` by doing `cudf.set_option("spill", True)`.
+
+Additionally, parameters are:
+  - `CUDF_SPILL_ON_DEMAND=ON` / `cudf.set_option("spill_on_demand", True)`, which registers an RMM out-of-memory error handler that spills buffers in order to free up memory.
+  - `CUDF_SPILL_DEVICE_LIMIT=...` / `cudf.set_option("spill_device_limit", ...)`, which sets a device memory limit in bytes.
+
+
+#### Design
+
+Spilling consists of two components:
+  - A new buffer sub-class, `SpillableBuffer`, that implements moving of its data from host to device memory in-place.
+  - A spill manager that tracks all instances of `SpillableBuffer` and spills them on demand.
+A global spill manager is used throughout cudf when spilling is enabled, which makes `as_buffer()` return `SpillableBuffer` instead of the default `Buffer` instances.
+
+Accessing `Buffer.ptr`, we get the device memory pointer of the buffer. This is unproblematic in the case of `Buffer` but what happens when accessing `SpillableBuffer.ptr`, which might have spilled its device memory. In this case, `SpillableBuffer` needs to unspill the memory before returning its device memory pointer. Furthermore, while this device memory pointer is being used (or could be used), `SpillableBuffer`  cannot spill its memory back to host memory because doing so would invalidate the device pointer.
+
+To address this, we mark the `SpillableBuffer` as unspillable, we say that the buffer has been _exposed_. This can either be permanent if the device pointer is exposed to external projects or temporary while `libcudf` accesses the device memory.
+
+The `SpillableBuffer.get_ptr()` returns the device pointer of the buffer memory just like `.ptr` but if given an instance of `SpillLock`, the buffer is only unspillable as long as the instance of `SpillLock` is alive.
+
+For convenience, one can use the decorator/context `with_spill_lock` to associate a `SpillLock` with a lifetime bound to the context automatically.
+
 ## The Cython layer
 
 The lowest level of cuDF is its interaction with `libcudf` via Cython.
diff --git a/python/cudf/cudf/_lib/binaryop.pyx b/python/cudf/cudf/_lib/binaryop.pyx
index f2fec24b05a..6212347b5b1 100644
--- a/python/cudf/cudf/_lib/binaryop.pyx
+++ b/python/cudf/cudf/_lib/binaryop.pyx
@@ -22,6 +22,7 @@ from cudf._lib.cpp.types cimport data_type, type_id
 from cudf._lib.types cimport dtype_to_data_type, underlying_type_t_type_id
 
 from cudf.api.types import is_scalar
+from cudf.core.buffer import acquire_spill_lock
 
 cimport cudf._lib.cpp.binaryop as cpp_binaryop
 from cudf._lib.cpp.binaryop cimport binary_operator
@@ -156,6 +157,7 @@ cdef binaryop_s_v(DeviceScalar lhs, Column rhs,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def binaryop(lhs, rhs, op, dtype):
     """
     Dispatches a binary op call to the appropriate libcudf function:
@@ -200,6 +202,7 @@ def binaryop(lhs, rhs, op, dtype):
     return result
 
 
+@acquire_spill_lock()
 def binaryop_udf(Column lhs, Column rhs, udf_ptx, dtype):
     """
     Apply a user-defined binary operator (a UDF) defined in `udf_ptx` on
diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd
index 2df958466c6..f8f851bfe0f 100644
--- a/python/cudf/cudf/_lib/column.pxd
+++ b/python/cudf/cudf/_lib/column.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -28,7 +28,9 @@ cdef class Column:
     cdef mutable_column_view mutable_view(self) except *
 
     @staticmethod
-    cdef Column from_unique_ptr(unique_ptr[column] c_col)
+    cdef Column from_unique_ptr(
+        unique_ptr[column] c_col, bint data_ptr_exposed=*
+    )
 
     @staticmethod
     cdef Column from_column_view(column_view, object)
diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
index c38c560b982..612f3cdf95a 100644
--- a/python/cudf/cudf/_lib/column.pyi
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -42,8 +42,6 @@ class Column:
     @property
     def base_data(self) -> Optional[Buffer]: ...
     @property
-    def base_data_ptr(self) -> int: ...
-    @property
     def data(self) -> Optional[Buffer]: ...
     @property
     def data_ptr(self) -> int: ...
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 918d786fb83..ec7d2570708 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -8,7 +8,14 @@ import rmm
 import cudf
 import cudf._lib as libcudf
 from cudf.api.types import is_categorical_dtype
-from cudf.core.buffer import Buffer, as_buffer
+from cudf.core.buffer import (
+    Buffer,
+    SpillableBuffer,
+    SpillLock,
+    acquire_spill_lock,
+    as_buffer,
+    get_spill_lock,
+)
 
 from cpython.buffer cimport PyObject_CheckBuffer
 from libc.stdint cimport uintptr_t
@@ -81,13 +88,6 @@ cdef class Column:
     def base_data(self):
         return self._base_data
 
-    @property
-    def base_data_ptr(self):
-        if self.base_data is None:
-            return 0
-        else:
-            return self.base_data.ptr
-
     @property
     def data(self):
         if self.base_data is None:
@@ -314,7 +314,8 @@ cdef class Column:
             return other_col
 
     cdef libcudf_types.size_type compute_null_count(self) except? 0:
-        return self._view(libcudf_types.UNKNOWN_NULL_COUNT).null_count()
+        with acquire_spill_lock():
+            return self._view(libcudf_types.UNKNOWN_NULL_COUNT).null_count()
 
     cdef mutable_column_view mutable_view(self) except *:
         if is_categorical_dtype(self.dtype):
@@ -328,7 +329,14 @@ cdef class Column:
         cdef vector[mutable_column_view] children
         cdef void* data
 
-        data = <void*><uintptr_t>(col.base_data_ptr)
+        if col.base_data is None:
+            data = NULL
+        elif isinstance(col.base_data, SpillableBuffer):
+            data = <void*><uintptr_t>(col.base_data).get_ptr(
+                spill_lock=get_spill_lock()
+            )
+        else:
+            data = <void*><uintptr_t>(col.base_data.ptr)
 
         cdef Column child_column
         if col.base_children:
@@ -381,7 +389,14 @@ cdef class Column:
         cdef vector[column_view] children
         cdef void* data
 
-        data = <void*><uintptr_t>(col.base_data_ptr)
+        if col.base_data is None:
+            data = NULL
+        elif isinstance(col.base_data, SpillableBuffer):
+            data = <void*><uintptr_t>(col.base_data).get_ptr(
+                spill_lock=get_spill_lock()
+            )
+        else:
+            data = <void*><uintptr_t>(col.base_data.ptr)
 
         cdef Column child_column
         if col.base_children:
@@ -406,7 +421,16 @@ cdef class Column:
             children)
 
     @staticmethod
-    cdef Column from_unique_ptr(unique_ptr[column] c_col):
+    cdef Column from_unique_ptr(
+        unique_ptr[column] c_col, bint data_ptr_exposed=False
+    ):
+        """Create a Column from a column
+
+        Typically, this is called on the result of a libcudf operation.
+        If the data of the libcudf result has been exposed, set
+        `data_ptr_exposed=True` to expose the memory of the returned Column
+        as well.
+        """
         cdef column_view view = c_col.get()[0].view()
         cdef libcudf_types.type_id tid = view.type().id()
         cdef libcudf_types.data_type c_dtype
@@ -431,20 +455,30 @@ cdef class Column:
         # After call to release(), c_col is unusable
         cdef column_contents contents = move(c_col.get()[0].release())
 
-        data = DeviceBuffer.c_from_unique_ptr(move(contents.data))
-        data = as_buffer(data)
+        data = as_buffer(
+            DeviceBuffer.c_from_unique_ptr(move(contents.data)),
+            exposed=data_ptr_exposed
+        )
 
         if null_count > 0:
-            mask = DeviceBuffer.c_from_unique_ptr(move(contents.null_mask))
-            mask = as_buffer(mask)
+            mask = as_buffer(
+                DeviceBuffer.c_from_unique_ptr(move(contents.null_mask)),
+                exposed=data_ptr_exposed
+            )
         else:
             mask = None
 
         cdef vector[unique_ptr[column]] c_children = move(contents.children)
-        children = ()
+        children = []
         if c_children.size() != 0:
-            children = tuple(Column.from_unique_ptr(move(c_children[i]))
-                             for i in range(c_children.size()))
+            # Because of a bug in Cython, we cannot set the optional
+            # `data_ptr_exposed` argument within a comprehension.
+            for i in range(c_children.size()):
+                child = Column.from_unique_ptr(
+                    move(c_children[i]),
+                    data_ptr_exposed=data_ptr_exposed
+                )
+                children.append(child)
 
         return cudf.core.column.build_column(
             data,
@@ -452,7 +486,7 @@ cdef class Column:
             mask=mask,
             size=size,
             null_count=null_count,
-            children=children
+            children=tuple(children)
         )
 
     @staticmethod
@@ -474,6 +508,7 @@ cdef class Column:
         size = cv.size()
         offset = cv.offset()
         dtype = dtype_from_column_view(cv)
+        dtype_itemsize = getattr(dtype, "itemsize", 1)
 
         data_ptr = <uintptr_t>(cv.head[void]())
         data = None
@@ -484,19 +519,46 @@ cdef class Column:
             data_owner = owner.base_data
             mask_owner = mask_owner.base_mask
             base_size = owner.base_size
-
+        base_nbytes = base_size * dtype_itemsize
         if data_ptr:
             if data_owner is None:
                 data = as_buffer(
                     rmm.DeviceBuffer(ptr=data_ptr,
-                                     size=(size+offset) * dtype.itemsize)
+                                     size=(size+offset) * dtype_itemsize)
                 )
+            elif (
+                # This is an optimization of the most common case where
+                # from_column_view creates a "view" that is identical to
+                # the owner.
+                column_owner and
+                isinstance(data_owner, SpillableBuffer) and
+                # We check that `data_owner` is spill locked (not spillable)
+                # and that its pointer is the same as `data_ptr` _without_
+                # exposing the buffer permanently (calling get_ptr with a
+                # dummy SpillLock).
+                not data_owner.spillable and
+                data_owner.get_ptr(spill_lock=SpillLock()) == data_ptr and
+                data_owner.size == base_nbytes
+            ):
+                data = data_owner
             else:
+                # At this point we don't know the relationship between data_ptr
+                # and data_owner thus we mark both of them exposed.
+                # TODO: try to discover their relationship and create a
+                #       SpillableBufferSlice instead.
                 data = as_buffer(
                     data=data_ptr,
-                    size=(base_size) * dtype.itemsize,
-                    owner=data_owner
+                    size=base_nbytes,
+                    owner=data_owner,
+                    exposed=True,
                 )
+                if isinstance(data_owner, SpillableBuffer):
+                    if data_owner.is_spilled:
+                        raise ValueError(
+                            f"{data_owner} is spilled, which invalidates "
+                            f"the exposed data_ptr ({hex(data_ptr)})"
+                        )
+                    data_owner.ptr  # accessing the pointer marks it exposed.
         else:
             data = as_buffer(
                 rmm.DeviceBuffer(ptr=data_ptr, size=0)
@@ -538,7 +600,8 @@ cdef class Column:
                 mask = as_buffer(
                     data=mask_ptr,
                     size=bitmask_allocation_size_bytes(base_size),
-                    owner=mask_owner
+                    owner=mask_owner,
+                    exposed=True
                 )
 
         if cv.has_nulls():
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 1de91e6a3e9..9f0b294b10c 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -12,7 +12,7 @@ from libcpp.vector cimport vector
 from rmm._lib.device_buffer cimport DeviceBuffer
 
 import cudf
-from cudf.core.buffer import Buffer, as_buffer
+from cudf.core.buffer import Buffer, acquire_spill_lock, as_buffer
 
 from cudf._lib.column cimport Column
 
@@ -64,6 +64,7 @@ def _gather_map_is_valid(
     return gm_min >= -nrows and gm_max < nrows
 
 
+@acquire_spill_lock()
 def copy_column(Column input_column):
     """
     Deep copies a column
@@ -85,6 +86,7 @@ def copy_column(Column input_column):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def _copy_range_in_place(Column input_column,
                          Column target_column,
                          size_type input_begin,
@@ -132,6 +134,7 @@ def _copy_range(Column input_column,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def copy_range(Column source_column,
                Column target_column,
                size_type source_begin,
@@ -165,6 +168,7 @@ def copy_range(Column source_column,
                            source_begin, source_end, target_begin)
 
 
+@acquire_spill_lock()
 def gather(
     list columns,
     Column gather_map,
@@ -232,6 +236,7 @@ cdef scatter_column(list source_columns,
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def scatter(list sources, Column scatter_map, list target_columns,
             bool bounds_check=True):
     """
@@ -272,6 +277,7 @@ def scatter(list sources, Column scatter_map, list target_columns,
         )
 
 
+@acquire_spill_lock()
 def column_empty_like(Column input_column):
 
     cdef column_view input_column_view = input_column.view()
@@ -283,6 +289,7 @@ def column_empty_like(Column input_column):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def column_allocate_like(Column input_column, size=None):
 
     cdef size_type c_size = 0
@@ -307,6 +314,7 @@ def column_allocate_like(Column input_column, size=None):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def columns_empty_like(list input_columns):
     cdef table_view input_table_view = table_view_from_columns(input_columns)
     cdef unique_ptr[table] c_result
@@ -317,6 +325,7 @@ def columns_empty_like(list input_columns):
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def column_slice(Column input_column, object indices):
 
     cdef column_view input_column_view = input_column.view()
@@ -346,6 +355,7 @@ def column_slice(Column input_column, object indices):
     return result
 
 
+@acquire_spill_lock()
 def columns_slice(list input_columns, list indices):
     """
     Given a list of input columns, return columns sliced by ``indices``.
@@ -372,6 +382,7 @@ def columns_slice(list input_columns, list indices):
     ]
 
 
+@acquire_spill_lock()
 def column_split(Column input_column, object splits):
 
     cdef column_view input_column_view = input_column.view()
@@ -403,6 +414,7 @@ def column_split(Column input_column, object splits):
     return result
 
 
+@acquire_spill_lock()
 def columns_split(list input_columns, object splits):
 
     cdef table_view input_table_view = table_view_from_columns(input_columns)
@@ -509,6 +521,7 @@ def _copy_if_else_scalar_scalar(DeviceScalar lhs,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def copy_if_else(object lhs, object rhs, Column boolean_mask):
 
     if isinstance(lhs, Column):
@@ -576,6 +589,7 @@ def _boolean_mask_scatter_scalar(list input_scalars, list target_columns,
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def boolean_mask_scatter(list input_, list target_columns,
                          Column boolean_mask):
     """Copy the target columns, replacing masked rows with input data.
@@ -608,6 +622,7 @@ def boolean_mask_scatter(list input_, list target_columns,
         )
 
 
+@acquire_spill_lock()
 def shift(Column input, int offset, object fill_value=None):
 
     cdef DeviceScalar fill
@@ -644,6 +659,7 @@ def shift(Column input, int offset, object fill_value=None):
     return Column.from_unique_ptr(move(c_output))
 
 
+@acquire_spill_lock()
 def get_element(Column input_column, size_type index):
     cdef column_view col_view = input_column.view()
 
@@ -658,6 +674,7 @@ def get_element(Column input_column, size_type index):
     )
 
 
+@acquire_spill_lock()
 def segmented_gather(Column source_column, Column gather_map):
     cdef shared_ptr[lists_column_view] source_LCV = (
         make_shared[lists_column_view](source_column.view())
@@ -725,7 +742,8 @@ cdef class _CPackedColumns:
         gpu_data = as_buffer(
             data=self.gpu_data_ptr,
             size=self.gpu_data_size,
-            owner=self
+            owner=self,
+            exposed=True
         )
         data_header, data_frames = gpu_data.serialize()
         header["data"] = data_header
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index cb0a245b915..81949dbaa20 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -12,6 +14,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def add_months(Column col, Column months):
     # months must be int16 dtype
     cdef unique_ptr[column] c_result
@@ -29,6 +32,7 @@ def add_months(Column col, Column months):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def extract_datetime_component(Column col, object field):
 
     cdef unique_ptr[column] c_result
@@ -99,6 +103,7 @@ cdef libcudf_datetime.rounding_frequency _get_rounding_frequency(object freq):
     return freq_val
 
 
+@acquire_spill_lock()
 def ceil_datetime(Column col, object freq):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
@@ -112,6 +117,7 @@ def ceil_datetime(Column col, object freq):
     return result
 
 
+@acquire_spill_lock()
 def floor_datetime(Column col, object freq):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
@@ -125,6 +131,7 @@ def floor_datetime(Column col, object freq):
     return result
 
 
+@acquire_spill_lock()
 def round_datetime(Column col, object freq):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
@@ -138,6 +145,7 @@ def round_datetime(Column col, object freq):
     return result
 
 
+@acquire_spill_lock()
 def is_leap_year(Column col):
     """Returns a boolean indicator whether the year of the date is a leap year
     """
@@ -150,6 +158,7 @@ def is_leap_year(Column col):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def date_range(DeviceScalar start, size_type n, offset):
     cdef unique_ptr[column] c_result
     cdef size_type months = (
@@ -166,6 +175,7 @@ def date_range(DeviceScalar start, size_type n, offset):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def extract_quarter(Column col):
     """
     Returns a column which contains the corresponding quarter of the year
@@ -180,6 +190,7 @@ def extract_quarter(Column col):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def days_in_month(Column col):
     """Extracts the number of days in the month of the date
     """
@@ -192,6 +203,7 @@ def days_in_month(Column col):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def last_day_of_month(Column col):
     cdef unique_ptr[column] c_result
     cdef column_view col_view = col.view()
diff --git a/python/cudf/cudf/_lib/filling.pyx b/python/cudf/cudf/_lib/filling.pyx
index 891da82821c..63549f08cbd 100644
--- a/python/cudf/cudf/_lib/filling.pyx
+++ b/python/cudf/cudf/_lib/filling.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -15,6 +17,7 @@ from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
+@acquire_spill_lock()
 def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
     cdef mutable_column_view c_destination = destination.mutable_view()
     cdef size_type c_begin = <size_type> begin
@@ -29,6 +32,7 @@ def fill_in_place(Column destination, int begin, int end, DeviceScalar value):
     )
 
 
+@acquire_spill_lock()
 def fill(Column destination, int begin, int end, DeviceScalar value):
     cdef column_view c_destination = destination.view()
     cdef size_type c_begin = <size_type> begin
@@ -47,6 +51,7 @@ def fill(Column destination, int begin, int end, DeviceScalar value):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def repeat(list inp, object count):
     if isinstance(count, Column):
         return _repeat_via_column(inp, count)
@@ -81,6 +86,7 @@ def _repeat_via_size_type(list inp, size_type count):
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def sequence(int size, DeviceScalar init, DeviceScalar step):
     cdef size_type c_size = size
     cdef const scalar* c_init = init.get_raw_ptr()
diff --git a/python/cudf/cudf/_lib/groupby.pyx b/python/cudf/cudf/_lib/groupby.pyx
index e6fbefaeee9..a8b7fef6a57 100644
--- a/python/cudf/cudf/_lib/groupby.pyx
+++ b/python/cudf/cudf/_lib/groupby.pyx
@@ -10,6 +10,7 @@ from cudf.api.types import (
     is_string_dtype,
     is_struct_dtype,
 )
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -86,13 +87,17 @@ cdef class GroupBy:
 
     def __cinit__(self, list keys, bool dropna=True, *args, **kwargs):
         cdef libcudf_types.null_policy c_null_handling
+        cdef table_view keys_view
 
         if dropna:
             c_null_handling = libcudf_types.null_policy.EXCLUDE
         else:
             c_null_handling = libcudf_types.null_policy.INCLUDE
 
-        cdef table_view keys_view = table_view_from_columns(keys)
+        with acquire_spill_lock() as spill_lock:
+            keys_view = table_view_from_columns(keys)
+            # We spill lock the columns while this GroupBy instance is alive.
+            self._spill_lock = spill_lock
 
         with nogil:
             self.c_obj.reset(
diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 03033cd1a7e..1264a9b2126 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
@@ -15,6 +17,7 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
+@acquire_spill_lock()
 def hash_partition(list source_columns, object columns_to_hash,
                    int num_partitions):
     cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
@@ -37,6 +40,7 @@ def hash_partition(list source_columns, object columns_to_hash,
     )
 
 
+@acquire_spill_lock()
 def hash(list source_columns, str method, int seed=0):
     cdef table_view c_source_view = table_view_from_columns(source_columns)
     cdef unique_ptr[column] c_result
diff --git a/python/cudf/cudf/_lib/join.pyx b/python/cudf/cudf/_lib/join.pyx
index ff5f6e1afcc..da03e8dcdd1 100644
--- a/python/cudf/cudf/_lib/join.pyx
+++ b/python/cudf/cudf/_lib/join.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
@@ -14,7 +16,9 @@ from cudf._lib.utils cimport table_view_from_columns
 # The functions below return the *gathermaps* that represent
 # the join result when joining on the keys `lhs` and `rhs`.
 
-cpdef join(list lhs, list rhs, how=None):
+
+@acquire_spill_lock()
+def join(list lhs, list rhs, how=None):
     cdef pair[cpp_join.gather_map_type, cpp_join.gather_map_type] c_result
     cdef table_view c_lhs = table_view_from_columns(lhs)
     cdef table_view c_rhs = table_view_from_columns(rhs)
@@ -36,7 +40,8 @@ cpdef join(list lhs, list rhs, how=None):
     return left_rows, right_rows
 
 
-cpdef semi_join(list lhs, list rhs, how=None):
+@acquire_spill_lock()
+def semi_join(list lhs, list rhs, how=None):
     # left-semi and left-anti joins
     cdef cpp_join.gather_map_type c_result
     cdef table_view c_lhs = table_view_from_columns(lhs)
diff --git a/python/cudf/cudf/_lib/labeling.pyx b/python/cudf/cudf/_lib/labeling.pyx
index ed5033c08a5..2c2538ab0af 100644
--- a/python/cudf/cudf/_lib/labeling.pyx
+++ b/python/cudf/cudf/_lib/labeling.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp cimport bool as cbool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,6 +15,7 @@ from cudf._lib.cpp.labeling cimport inclusive, label_bins as cpp_label_bins
 # Note that the parameter input shadows a Python built-in in the local scope,
 # but I'm not too concerned about that since there's no use-case for actual
 # input in this context.
+@acquire_spill_lock()
 def label_bins(Column input, Column left_edges, cbool left_inclusive,
                Column right_edges, cbool right_inclusive):
     cdef inclusive c_left_inclusive = \
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 8a7b4be3be9..47e9dccc8e6 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp cimport bool
 from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
 from libcpp.utility cimport move
@@ -35,6 +37,7 @@ from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
+@acquire_spill_lock()
 def count_elements(Column col):
 
     # shared_ptr required because lists_column_view has no default
@@ -51,6 +54,7 @@ def count_elements(Column col):
     return result
 
 
+@acquire_spill_lock()
 def explode_outer(
     list source_columns, int explode_column_idx
 ):
@@ -65,6 +69,7 @@ def explode_outer(
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def distinct(Column col, bool nulls_equal, bool nans_all_equal):
     """
     nulls_equal == True indicates that libcudf should treat any two nulls as
@@ -93,6 +98,7 @@ def distinct(Column col, bool nulls_equal, bool nans_all_equal):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def sort_lists(Column col, bool ascending, str na_position):
     cdef shared_ptr[lists_column_view] list_view = (
         make_shared[lists_column_view](col.view())
@@ -114,6 +120,7 @@ def sort_lists(Column col, bool ascending, str na_position):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def extract_element_scalar(Column col, size_type index):
     # shared_ptr required because lists_column_view has no default
     # ctor
@@ -130,6 +137,7 @@ def extract_element_scalar(Column col, size_type index):
     return result
 
 
+@acquire_spill_lock()
 def extract_element_column(Column col, Column index):
     cdef shared_ptr[lists_column_view] list_view = (
         make_shared[lists_column_view](col.view())
@@ -146,6 +154,7 @@ def extract_element_column(Column col, Column index):
     return result
 
 
+@acquire_spill_lock()
 def contains_scalar(Column col, object py_search_key):
 
     cdef DeviceScalar search_key = py_search_key.device_value
@@ -166,6 +175,7 @@ def contains_scalar(Column col, object py_search_key):
     return result
 
 
+@acquire_spill_lock()
 def index_of_scalar(Column col, object py_search_key):
 
     cdef DeviceScalar search_key = py_search_key.device_value
@@ -185,6 +195,7 @@ def index_of_scalar(Column col, object py_search_key):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def index_of_column(Column col, Column search_keys):
 
     cdef column_view keys_view = search_keys.view()
@@ -203,6 +214,7 @@ def index_of_column(Column col, Column search_keys):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def concatenate_rows(list source_columns):
     cdef unique_ptr[column] c_result
 
@@ -216,6 +228,7 @@ def concatenate_rows(list source_columns):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def concatenate_list_elements(Column input_column, dropna=False):
     cdef concatenate_null_policy policy = (
         concatenate_null_policy.IGNORE if dropna
diff --git a/python/cudf/cudf/_lib/null_mask.pyx b/python/cudf/cudf/_lib/null_mask.pyx
index 61988019c70..c41ae98b9bd 100644
--- a/python/cudf/cudf/_lib/null_mask.pyx
+++ b/python/cudf/cudf/_lib/null_mask.pyx
@@ -2,12 +2,14 @@
 
 from enum import Enum
 
+from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+
+from cudf.core.buffer import acquire_spill_lock, as_buffer
+
 from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
 
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
-
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.null_mask cimport (
@@ -22,8 +24,6 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.cpp.types cimport mask_state, size_type
 from cudf._lib.utils cimport table_view_from_columns
 
-from cudf.core.buffer import as_buffer
-
 
 class MaskState(Enum):
     """
@@ -35,6 +35,7 @@ class MaskState(Enum):
     ALL_NULL = <underlying_type_t_mask_state> mask_state.ALL_NULL
 
 
+@acquire_spill_lock()
 def copy_bitmask(Column col):
     """
     Copies column's validity mask buffer into a new buffer, shifting by the
@@ -102,6 +103,7 @@ def create_null_mask(size_type size, state=MaskState.UNINITIALIZED):
     return buf
 
 
+@acquire_spill_lock()
 def bitmask_and(columns: list):
     cdef table_view c_view = table_view_from_columns(columns)
     cdef pair[device_buffer, size_type] c_result
@@ -114,6 +116,7 @@ def bitmask_and(columns: list):
     return buf, c_result.second
 
 
+@acquire_spill_lock()
 def bitmask_or(columns: list):
     cdef table_view c_view = table_view_from_columns(columns)
     cdef pair[device_buffer, size_type] c_result
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
index c8dc6edd6e2..984c8e84d7c 100644
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -12,6 +14,7 @@ from cudf._lib.cpp.nvtext.edit_distance cimport (
 )
 
 
+@acquire_spill_lock()
 def edit_distance(Column strings, Column targets):
     cdef column_view c_strings = strings.view()
     cdef column_view c_targets = targets.view()
@@ -23,6 +26,7 @@ def edit_distance(Column strings, Column targets):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def edit_distance_matrix(Column strings):
     cdef column_view c_strings = strings.view()
     cdef unique_ptr[column] c_result
diff --git a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
index 5fcec570dcb..7be3b0f7c03 100644
--- a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
+++ b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -15,6 +17,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def generate_ngrams(Column strings, int ngrams, object py_separator):
 
     cdef DeviceScalar separator = py_separator.device_value
@@ -37,6 +40,7 @@ def generate_ngrams(Column strings, int ngrams, object py_separator):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def generate_character_ngrams(Column strings, int ngrams):
     cdef column_view c_strings = strings.view()
     cdef size_type c_ngrams = ngrams
diff --git a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
index 1e9e0e39ff1..3e7911c8ae8 100644
--- a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -14,6 +16,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def ngrams_tokenize(
     Column strings,
     int ngrams,
diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx
index e475f0cd996..80c6ef792ab 100644
--- a/python/cudf/cudf/_lib/nvtext/normalize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/normalize.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -13,6 +15,7 @@ from cudf._lib.cpp.nvtext.normalize cimport (
 )
 
 
+@acquire_spill_lock()
 def normalize_spaces(Column strings):
     cdef column_view c_strings = strings.view()
     cdef unique_ptr[column] c_result
@@ -23,6 +26,7 @@ def normalize_spaces(Column strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def normalize_characters(Column strings, bool do_lower=True):
     cdef column_view c_strings = strings.view()
     cdef unique_ptr[column] c_result
diff --git a/python/cudf/cudf/_lib/nvtext/replace.pyx b/python/cudf/cudf/_lib/nvtext/replace.pyx
index b4f37ac3ec7..289e5611010 100644
--- a/python/cudf/cudf/_lib/nvtext/replace.pyx
+++ b/python/cudf/cudf/_lib/nvtext/replace.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -15,6 +17,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def replace_tokens(Column strings,
                    Column targets,
                    Column replacements,
@@ -49,6 +52,7 @@ def replace_tokens(Column strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def filter_tokens(Column strings,
                   size_type min_token_length,
                   object py_replacement,
diff --git a/python/cudf/cudf/_lib/nvtext/stemmer.pyx b/python/cudf/cudf/_lib/nvtext/stemmer.pyx
index 89d4b07b7ad..7a76052ffe4 100644
--- a/python/cudf/cudf/_lib/nvtext/stemmer.pyx
+++ b/python/cudf/cudf/_lib/nvtext/stemmer.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -22,6 +24,7 @@ class LetterType(IntEnum):
     VOWEL = <underlying_type_t_letter_type> letter_type.VOWEL
 
 
+@acquire_spill_lock()
 def porter_stemmer_measure(Column strings):
     cdef column_view c_strings = strings.view()
     cdef unique_ptr[column] c_result
@@ -32,6 +35,7 @@ def porter_stemmer_measure(Column strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_letter(Column strings,
               object ltype,
               size_type index):
@@ -47,6 +51,7 @@ def is_letter(Column strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_letter_multi(Column strings,
                     object ltype,
                     Column indices):
diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
index dd8bbd6d7b6..dbd23d91cc5 100644
--- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
@@ -1,6 +1,9 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint32_t
+
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
@@ -26,6 +29,7 @@ cdef class Hashed_Vocabulary:
             self.c_obj = move(cpp_load_vocabulary_file(c_hash_file))
 
 
+@acquire_spill_lock()
 def subword_tokenize_inmem_hash(
     Column strings,
     Hashed_Vocabulary hashed_vocabulary,
diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
index 00f63b9cf7c..2bb4fa8e108 100644
--- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx
+++ b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -16,6 +18,7 @@ from cudf._lib.cpp.scalar.scalar cimport string_scalar
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def _tokenize_scalar(Column strings, object py_delimiter):
 
     cdef DeviceScalar delimiter = py_delimiter.device_value
@@ -36,6 +39,7 @@ def _tokenize_scalar(Column strings, object py_delimiter):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def _tokenize_column(Column strings, Column delimiters):
     cdef column_view c_strings = strings.view()
     cdef column_view c_delimiters = delimiters.view()
@@ -52,6 +56,7 @@ def _tokenize_column(Column strings, Column delimiters):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def _count_tokens_scalar(Column strings, object py_delimiter):
 
     cdef DeviceScalar delimiter = py_delimiter.device_value
@@ -72,6 +77,7 @@ def _count_tokens_scalar(Column strings, object py_delimiter):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def _count_tokens_column(Column strings, Column delimiters):
     cdef column_view c_strings = strings.view()
     cdef column_view c_delimiters = delimiters.view()
@@ -88,6 +94,7 @@ def _count_tokens_column(Column strings, Column delimiters):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def character_tokenize(Column strings):
     cdef column_view c_strings = strings.view()
     cdef unique_ptr[column] c_result
@@ -99,6 +106,7 @@ def character_tokenize(Column strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def detokenize(Column strings, Column indices, object py_separator):
 
     cdef DeviceScalar separator = py_separator.device_value
diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index 233551c5134..083407954b3 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.pair cimport pair
 from libcpp.utility cimport move
@@ -16,6 +18,7 @@ from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 cimport cudf._lib.cpp.types as libcudf_types
 
 
+@acquire_spill_lock()
 def partition(list source_columns, Column partition_map,
               object num_partitions):
 
diff --git a/python/cudf/cudf/_lib/quantiles.pyx b/python/cudf/cudf/_lib/quantiles.pyx
index 62706367c4f..d3a02fa7cbf 100644
--- a/python/cudf/cudf/_lib/quantiles.pyx
+++ b/python/cudf/cudf/_lib/quantiles.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -27,6 +29,7 @@ from cudf._lib.cpp.types cimport interpolation, null_order, order, sorted
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
+@acquire_spill_lock()
 def quantile(
     Column input,
     object q,
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
index e46d724ed9d..f11bacd5d1e 100644
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ b/python/cudf/cudf/_lib/reduce.pyx
@@ -3,6 +3,7 @@
 from cython.operator import dereference
 
 import cudf
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move, pair
@@ -23,6 +24,7 @@ from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.types cimport dtype_to_data_type, is_decimal_type_id
 
 
+@acquire_spill_lock()
 def reduce(reduction_op, Column incol, dtype=None, **kwargs):
     """
     Top level Cython reduce function wrapping libcudf reductions.
@@ -79,6 +81,7 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
     return py_result.value
 
 
+@acquire_spill_lock()
 def scan(scan_op, Column incol, inclusive, **kwargs):
     """
     Top level Cython scan function wrapping libcudf scans.
@@ -110,6 +113,7 @@ def scan(scan_op, Column incol, inclusive, **kwargs):
     return py_result
 
 
+@acquire_spill_lock()
 def minmax(Column incol):
     """
     Top level Cython minmax function wrapping libcudf minmax.
diff --git a/python/cudf/cudf/_lib/replace.pyx b/python/cudf/cudf/_lib/replace.pyx
index e4311b356ec..c763a86d6e5 100644
--- a/python/cudf/cudf/_lib/replace.pyx
+++ b/python/cudf/cudf/_lib/replace.pyx
@@ -1,9 +1,10 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
 from cudf.api.types import is_scalar
+from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
@@ -22,6 +23,7 @@ from cudf._lib.cpp.scalar.scalar cimport scalar
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def replace(Column input_col, Column values_to_replace,
             Column replacement_values):
     """
@@ -48,6 +50,7 @@ def replace(Column input_col, Column values_to_replace,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace_nulls_column(Column input_col, Column replacement_values):
     """
     Replaces null values in input_col with corresponding values from
@@ -70,6 +73,7 @@ def replace_nulls_column(Column input_col, Column replacement_values):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace_nulls_scalar(Column input_col, DeviceScalar replacement_value):
     """
     Replaces null values in input_col with replacement_value
@@ -92,6 +96,7 @@ def replace_nulls_scalar(Column input_col, DeviceScalar replacement_value):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace_nulls_fill(Column input_col, object method):
     """
     Replaces null values in input_col with replacement_value
@@ -145,6 +150,7 @@ def replace_nulls(
         return replace_nulls_column(input_col, replacement)
 
 
+@acquire_spill_lock()
 def clamp(Column input_col, DeviceScalar lo, DeviceScalar lo_replace,
           DeviceScalar hi, DeviceScalar hi_replace):
     """
@@ -175,6 +181,7 @@ def clamp(Column input_col, DeviceScalar lo, DeviceScalar lo_replace,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def clamp(Column input_col, DeviceScalar lo, DeviceScalar hi):
     """
     Clip the input_col such that values < lo will be replaced by lo
@@ -198,6 +205,7 @@ def clamp(Column input_col, DeviceScalar lo, DeviceScalar hi):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def clip(Column input_col, object lo, object hi):
     """
     Clip the input_col such that values < lo will be replaced by lo
@@ -210,6 +218,7 @@ def clip(Column input_col, object lo, object hi):
     return clamp(input_col, lo_scalar, hi_scalar)
 
 
+@acquire_spill_lock()
 def normalize_nans_and_zeros_inplace(Column input_col):
     """
     Inplace normalizing
@@ -220,6 +229,7 @@ def normalize_nans_and_zeros_inplace(Column input_col):
         cpp_normalize_nans_and_zeros(input_col_view)
 
 
+@acquire_spill_lock()
 def normalize_nans_and_zeros_column(Column input_col):
     """
     Returns a new  normalized Column
diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx
index 84bad039199..c237b7b1389 100644
--- a/python/cudf/cudf/_lib/reshape.pyx
+++ b/python/cudf/cudf/_lib/reshape.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -15,6 +17,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
+@acquire_spill_lock()
 def interleave_columns(list source_columns):
     cdef table_view c_view = table_view_from_columns(source_columns)
     cdef unique_ptr[column] c_result
@@ -25,6 +28,7 @@ def interleave_columns(list source_columns):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def tile(list source_columns, size_type count):
     cdef size_type c_count = count
     cdef table_view c_view = table_view_from_columns(source_columns)
diff --git a/python/cudf/cudf/_lib/rolling.pyx b/python/cudf/cudf/_lib/rolling.pyx
index 7b0da6957a0..8c4751e3084 100644
--- a/python/cudf/cudf/_lib/rolling.pyx
+++ b/python/cudf/cudf/_lib/rolling.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -11,6 +13,7 @@ from cudf._lib.cpp.rolling cimport rolling_window as cpp_rolling_window
 from cudf._lib.cpp.types cimport size_type
 
 
+@acquire_spill_lock()
 def rolling(Column source_column,
             Column pre_column_window,
             Column fwd_column_window,
diff --git a/python/cudf/cudf/_lib/round.pyx b/python/cudf/cudf/_lib/round.pyx
index c5c565561a9..7eddb1b8cbd 100644
--- a/python/cudf/cudf/_lib/round.pyx
+++ b/python/cudf/cudf/_lib/round.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -12,6 +14,7 @@ from cudf._lib.cpp.round cimport (
 )
 
 
+@acquire_spill_lock()
 def round(Column input_col, int decimal_places=0, how="half_even"):
     """
     Round column values to the given number of decimal places
diff --git a/python/cudf/cudf/_lib/search.pyx b/python/cudf/cudf/_lib/search.pyx
index b8abe3d0dab..fef3a08c6d7 100644
--- a/python/cudf/cudf/_lib/search.pyx
+++ b/python/cudf/cudf/_lib/search.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
@@ -13,6 +15,7 @@ from cudf._lib.cpp.table.table_view cimport table_view
 from cudf._lib.utils cimport table_view_from_columns
 
 
+@acquire_spill_lock()
 def search_sorted(
     list source, list values, side, ascending=True, na_position="last"
 ):
@@ -73,6 +76,7 @@ def search_sorted(
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def contains(Column haystack, Column needles):
     """Check whether column contains multiple values
 
diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
index eb3aed80700..3b96cc618dd 100644
--- a/python/cudf/cudf/_lib/sort.pyx
+++ b/python/cudf/cudf/_lib/sort.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -23,6 +25,7 @@ from cudf._lib.cpp.types cimport null_order, null_policy, order
 from cudf._lib.utils cimport table_view_from_columns
 
 
+@acquire_spill_lock()
 def is_sorted(
     list source_columns, object ascending=None, object null_position=None
 ):
@@ -98,6 +101,7 @@ def is_sorted(
     return c_result
 
 
+@acquire_spill_lock()
 def order_by(list columns_from_table, object ascending, str na_position):
     """
     Get index to sort the table in ascending/descending order.
@@ -139,6 +143,7 @@ def order_by(list columns_from_table, object ascending, str na_position):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def digitize(list source_columns, list bins, bool right=False):
     """
     Return the indices of the bins to which each value in source_table belongs.
@@ -189,6 +194,7 @@ def digitize(list source_columns, list bins, bool right=False):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def rank_columns(list source_columns, object method, str na_option,
                  bool ascending, bool pct
                  ):
diff --git a/python/cudf/cudf/_lib/stream_compaction.pyx b/python/cudf/cudf/_lib/stream_compaction.pyx
index 38cead87e76..143999e52ef 100644
--- a/python/cudf/cudf/_lib/stream_compaction.pyx
+++ b/python/cudf/cudf/_lib/stream_compaction.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -28,6 +30,7 @@ from cudf._lib.cpp.types cimport (
 from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
 
 
+@acquire_spill_lock()
 def drop_nulls(list columns, how="any", keys=None, thresh=None):
     """
     Drops null rows from cols depending on key columns.
@@ -71,6 +74,7 @@ def drop_nulls(list columns, how="any", keys=None, thresh=None):
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def apply_boolean_mask(list columns, Column boolean_mask):
     """
     Drops the rows which correspond to False in boolean_mask.
@@ -100,6 +104,7 @@ def apply_boolean_mask(list columns, Column boolean_mask):
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def drop_duplicates(list columns,
                     object keys=None,
                     object keep='first',
@@ -184,6 +189,7 @@ def drop_duplicates(list columns,
     return columns_from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def distinct_count(Column source_column, ignore_nulls=True, nan_as_null=False):
     """
     Finds number of unique rows in `source_column`
diff --git a/python/cudf/cudf/_lib/strings/attributes.pyx b/python/cudf/cudf/_lib/strings/attributes.pyx
index 8720fad7455..c1b69dda353 100644
--- a/python/cudf/cudf/_lib/strings/attributes.pyx
+++ b/python/cudf/cudf/_lib/strings/attributes.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,6 +15,7 @@ from cudf._lib.cpp.strings.attributes cimport (
 )
 
 
+@acquire_spill_lock()
 def count_characters(Column source_strings):
     """
     Returns an integer numeric column containing the
@@ -27,6 +30,7 @@ def count_characters(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def count_bytes(Column source_strings):
     """
     Returns an integer numeric column containing the
@@ -41,6 +45,7 @@ def count_bytes(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def code_points(Column source_strings):
     """
     Creates a numeric column with code point values (integers)
diff --git a/python/cudf/cudf/_lib/strings/capitalize.pyx b/python/cudf/cudf/_lib/strings/capitalize.pyx
index 0bbdfa462e2..f6a80ac8fbe 100644
--- a/python/cudf/cudf/_lib/strings/capitalize.pyx
+++ b/python/cudf/cudf/_lib/strings/capitalize.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,6 +15,7 @@ from cudf._lib.cpp.strings.capitalize cimport (
 )
 
 
+@acquire_spill_lock()
 def capitalize(Column source_strings):
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
@@ -23,6 +26,7 @@ def capitalize(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def title(Column source_strings):
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
@@ -33,6 +37,7 @@ def title(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_title(Column source_strings):
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
diff --git a/python/cudf/cudf/_lib/strings/case.pyx b/python/cudf/cudf/_lib/strings/case.pyx
index 13679f3fb02..09af1178946 100644
--- a/python/cudf/cudf/_lib/strings/case.pyx
+++ b/python/cudf/cudf/_lib/strings/case.pyx
@@ -1,4 +1,6 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -13,6 +15,7 @@ from cudf._lib.cpp.strings.case cimport (
 )
 
 
+@acquire_spill_lock()
 def to_upper(Column source_strings):
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
@@ -23,6 +26,7 @@ def to_upper(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def to_lower(Column source_strings):
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
@@ -33,6 +37,7 @@ def to_lower(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def swapcase(Column source_strings):
     cdef unique_ptr[column] c_result
     cdef column_view source_view = source_strings.view()
diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx
index 3ef9db2345d..eb03d7c2192 100644
--- a/python/cudf/cudf/_lib/strings/char_types.pyx
+++ b/python/cudf/cudf/_lib/strings/char_types.pyx
@@ -1,9 +1,12 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -16,6 +19,7 @@ from cudf._lib.cpp.strings.char_types cimport (
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def filter_alphanum(Column source_strings, object py_repl, bool keep=True):
     """
     Returns a Column of strings keeping only alphanumeric character types.
@@ -42,6 +46,7 @@ def filter_alphanum(Column source_strings, object py_repl, bool keep=True):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_decimal(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -61,6 +66,7 @@ def is_decimal(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_alnum(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -81,6 +87,7 @@ def is_alnum(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_alpha(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -99,6 +106,7 @@ def is_alpha(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_digit(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -117,6 +125,7 @@ def is_digit(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_numeric(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -136,6 +145,7 @@ def is_numeric(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_upper(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -154,6 +164,7 @@ def is_upper(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_lower(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -172,6 +183,7 @@ def is_lower(Column source_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_space(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
diff --git a/python/cudf/cudf/_lib/strings/combine.pyx b/python/cudf/cudf/_lib/strings/combine.pyx
index 141732b4c75..f38f4c5f847 100644
--- a/python/cudf/cudf/_lib/strings/combine.pyx
+++ b/python/cudf/cudf/_lib/strings/combine.pyx
@@ -1,5 +1,7 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
@@ -19,6 +21,7 @@ from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport table_view_from_columns
 
 
+@acquire_spill_lock()
 def concatenate(list source_strings,
                 object sep,
                 object na_rep):
@@ -49,6 +52,7 @@ def concatenate(list source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def join(Column source_strings,
          object sep,
          object na_rep):
@@ -80,6 +84,7 @@ def join(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def join_lists_with_scalar(
         Column source_strings,
         object py_separator,
@@ -115,6 +120,7 @@ def join_lists_with_scalar(
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def join_lists_with_column(
         Column source_strings,
         Column separator_strings,
diff --git a/python/cudf/cudf/_lib/strings/contains.pyx b/python/cudf/cudf/_lib/strings/contains.pyx
index 41c4b54d8b1..7ca93b83921 100644
--- a/python/cudf/cudf/_lib/strings/contains.pyx
+++ b/python/cudf/cudf/_lib/strings/contains.pyx
@@ -1,6 +1,9 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint32_t
+
+from cudf.core.buffer import acquire_spill_lock
+
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
@@ -19,6 +22,7 @@ from cudf._lib.cpp.strings.regex_flags cimport regex_flags
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def contains_re(Column source_strings, object reg_ex, uint32_t flags):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -40,6 +44,7 @@ def contains_re(Column source_strings, object reg_ex, uint32_t flags):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def count_re(Column source_strings, object reg_ex, uint32_t flags):
     """
     Returns a Column with count of occurrences of `reg_ex` in
@@ -61,6 +66,7 @@ def count_re(Column source_strings, object reg_ex, uint32_t flags):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def match_re(Column source_strings, object reg_ex, uint32_t flags):
     """
     Returns a Column with each value True if the string matches `reg_ex`
@@ -82,6 +88,7 @@ def match_re(Column source_strings, object reg_ex, uint32_t flags):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def like(Column source_strings, object py_pattern, object py_escape):
     """
     Returns a Column with each value True if the string matches the
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index fc07cf6462a..177cbffddb0 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -5,6 +5,8 @@ import cudf
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -16,6 +18,7 @@ from cudf._lib.cpp.strings.convert.convert_fixed_point cimport (
 from cudf._lib.cpp.types cimport DECIMAL32, DECIMAL64, DECIMAL128, data_type
 
 
+@acquire_spill_lock()
 def from_decimal(Column input_col):
     """
     Converts a `Decimal64Column` to a `StringColumn`.
@@ -38,6 +41,7 @@ def from_decimal(Column input_col):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def to_decimal(Column input_col, object out_type):
     """
     Returns a `Decimal64Column` from the provided `StringColumn`
@@ -75,6 +79,7 @@ def to_decimal(Column input_col, object out_type):
     return result
 
 
+@acquire_spill_lock()
 def is_fixed_point(Column input_col, object dtype):
     """
     Returns a Column of boolean values with True for `input_col`
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
index f9d028c5eb5..d1617d85593 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
@@ -3,6 +3,8 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -11,6 +13,7 @@ from cudf._lib.cpp.strings.convert.convert_floats cimport (
 )
 
 
+@acquire_spill_lock()
 def is_float(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
index 220cbd0f760..dc560c42182 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_integers.pyx
@@ -3,6 +3,8 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -11,6 +13,7 @@ from cudf._lib.cpp.strings.convert.convert_integers cimport (
 )
 
 
+@acquire_spill_lock()
 def is_integer(Column source_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx b/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx
index 7ffa69cd680..33f6d4a4af7 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_lists.pyx
@@ -1,8 +1,10 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -16,6 +18,7 @@ from cudf._lib.scalar import as_device_scalar
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def format_list_column(Column source_list, Column separators):
     """
     Format a list column of strings into a strings column.
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
index 8d673de12b8..bc8123281f0 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
@@ -3,6 +3,8 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -12,6 +14,7 @@ from cudf._lib.cpp.strings.convert.convert_urls cimport (
 )
 
 
+@acquire_spill_lock()
 def url_decode(Column source_strings):
     """
     Decode each string in column. No format checking is performed.
@@ -37,6 +40,7 @@ def url_decode(Column source_strings):
     )
 
 
+@acquire_spill_lock()
 def url_encode(Column source_strings):
     """
     Encode each string in column. No format checking is performed.
diff --git a/python/cudf/cudf/_lib/strings/extract.pyx b/python/cudf/cudf/_lib/strings/extract.pyx
index 439c1546381..7d16e3e839d 100644
--- a/python/cudf/cudf/_lib/strings/extract.pyx
+++ b/python/cudf/cudf/_lib/strings/extract.pyx
@@ -5,6 +5,8 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.strings.extract cimport extract as cpp_extract
@@ -13,6 +15,7 @@ from cudf._lib.cpp.table.table cimport table
 from cudf._lib.utils cimport data_from_unique_ptr
 
 
+@acquire_spill_lock()
 def extract(Column source_strings, object pattern, uint32_t flags):
     """
     Returns data which contains extracted capture groups provided in
diff --git a/python/cudf/cudf/_lib/strings/find.pyx b/python/cudf/cudf/_lib/strings/find.pyx
index 788c0a2524a..f6dd3b80de9 100644
--- a/python/cudf/cudf/_lib/strings/find.pyx
+++ b/python/cudf/cudf/_lib/strings/find.pyx
@@ -1,8 +1,10 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -18,6 +20,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def contains(Column source_strings, object py_target):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -41,6 +44,7 @@ def contains(Column source_strings, object py_target):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def contains_multiple(Column source_strings, Column target_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -59,6 +63,7 @@ def contains_multiple(Column source_strings, Column target_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def endswith(Column source_strings, object py_target):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -83,6 +88,7 @@ def endswith(Column source_strings, object py_target):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def endswith_multiple(Column source_strings, Column target_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -102,6 +108,7 @@ def endswith_multiple(Column source_strings, Column target_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def startswith(Column source_strings, object py_target):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -126,6 +133,7 @@ def startswith(Column source_strings, object py_target):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def startswith_multiple(Column source_strings, Column target_strings):
     """
     Returns a Column of boolean values with True for `source_strings`
@@ -145,6 +153,7 @@ def startswith_multiple(Column source_strings, Column target_strings):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def find(Column source_strings,
          object py_target,
          size_type start,
@@ -176,6 +185,7 @@ def find(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def rfind(Column source_strings,
           object py_target,
           size_type start,
diff --git a/python/cudf/cudf/_lib/strings/find_multiple.pyx b/python/cudf/cudf/_lib/strings/find_multiple.pyx
index 4ac86ce4ef5..c2a97a4fd7c 100644
--- a/python/cudf/cudf/_lib/strings/find_multiple.pyx
+++ b/python/cudf/cudf/_lib/strings/find_multiple.pyx
@@ -1,8 +1,10 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -11,6 +13,7 @@ from cudf._lib.cpp.strings.find_multiple cimport (
 )
 
 
+@acquire_spill_lock()
 def find_multiple(Column source_strings, Column target_strings):
     """
     Returns a column with character position values where each
diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx
index be34ce1fb18..4080d346142 100644
--- a/python/cudf/cudf/_lib/strings/findall.pyx
+++ b/python/cudf/cudf/_lib/strings/findall.pyx
@@ -5,6 +5,8 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -12,6 +14,7 @@ from cudf._lib.cpp.strings.findall cimport findall as cpp_findall
 from cudf._lib.cpp.strings.regex_flags cimport regex_flags
 
 
+@acquire_spill_lock()
 def findall(Column source_strings, object pattern, uint32_t flags):
     """
     Returns data with all non-overlapping matches of `pattern`
diff --git a/python/cudf/cudf/_lib/strings/json.pyx b/python/cudf/cudf/_lib/strings/json.pyx
index 9dbc932d842..861e0daa6e3 100644
--- a/python/cudf/cudf/_lib/strings/json.pyx
+++ b/python/cudf/cudf/_lib/strings/json.pyx
@@ -3,6 +3,8 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -14,6 +16,7 @@ from cudf._lib.cpp.strings.json cimport (
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def get_json_object(
         Column col, object py_json_path, GetJsonObjectOptions options):
     """
diff --git a/python/cudf/cudf/_lib/strings/padding.pyx b/python/cudf/cudf/_lib/strings/padding.pyx
index f53feab7936..340d7eb52d8 100644
--- a/python/cudf/cudf/_lib/strings/padding.pyx
+++ b/python/cudf/cudf/_lib/strings/padding.pyx
@@ -1,16 +1,17 @@
 # Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
+from libcpp.string cimport string
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.types cimport size_type
 
 from enum import IntEnum
 
-from libcpp.string cimport string
-
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.strings.padding cimport pad as cpp_pad, zfill as cpp_zfill
 from cudf._lib.cpp.strings.side_type cimport (
@@ -25,6 +26,7 @@ class SideType(IntEnum):
     BOTH = <underlying_type_t_side_type> side_type.BOTH
 
 
+@acquire_spill_lock()
 def pad(Column source_strings,
         size_type width,
         fill_char,
@@ -55,6 +57,7 @@ def pad(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def zfill(Column source_strings,
           size_type width):
     """
@@ -73,6 +76,7 @@ def zfill(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def center(Column source_strings,
            size_type width,
            fill_char):
@@ -97,6 +101,7 @@ def center(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def ljust(Column source_strings,
           size_type width,
           fill_char):
@@ -120,6 +125,7 @@ def ljust(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def rjust(Column source_strings,
           size_type width,
           fill_char):
diff --git a/python/cudf/cudf/_lib/strings/repeat.pyx b/python/cudf/cudf/_lib/strings/repeat.pyx
index 49a46f418b1..4896fb74f41 100644
--- a/python/cudf/cudf/_lib/strings/repeat.pyx
+++ b/python/cudf/cudf/_lib/strings/repeat.pyx
@@ -1,8 +1,10 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -10,6 +12,7 @@ from cudf._lib.cpp.strings cimport repeat as cpp_repeat
 from cudf._lib.cpp.types cimport size_type
 
 
+@acquire_spill_lock()
 def repeat_scalar(Column source_strings,
                   size_type repeats):
     """
@@ -29,6 +32,7 @@ def repeat_scalar(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def repeat_sequence(Column source_strings,
                     Column repeats):
     """
diff --git a/python/cudf/cudf/_lib/strings/replace.pyx b/python/cudf/cudf/_lib/strings/replace.pyx
index 72d66d9a8e3..80c9ba95fd8 100644
--- a/python/cudf/cudf/_lib/strings/replace.pyx
+++ b/python/cudf/cudf/_lib/strings/replace.pyx
@@ -4,6 +4,8 @@ from libc.stdint cimport int32_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -16,6 +18,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def slice_replace(Column source_strings,
                   size_type start,
                   size_type stop,
@@ -46,6 +49,7 @@ def slice_replace(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def insert(Column source_strings,
            size_type start,
            object py_repl):
@@ -74,6 +78,7 @@ def insert(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace(Column source_strings,
             object py_target,
             object py_repl,
@@ -107,6 +112,7 @@ def replace(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace_multi(Column source_strings,
                   Column target_strings,
                   Column repl_strings):
diff --git a/python/cudf/cudf/_lib/strings/replace_re.pyx b/python/cudf/cudf/_lib/strings/replace_re.pyx
index 20fb903c60c..73911538db2 100644
--- a/python/cudf/cudf/_lib/strings/replace_re.pyx
+++ b/python/cudf/cudf/_lib/strings/replace_re.pyx
@@ -1,10 +1,12 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -17,6 +19,7 @@ from cudf._lib.cpp.types cimport size_type
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def replace_re(Column source_strings,
                object pattern,
                object py_repl,
@@ -48,6 +51,7 @@ def replace_re(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace_with_backrefs(
         Column source_strings,
         object pattern,
@@ -73,6 +77,7 @@ def replace_with_backrefs(
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def replace_multi_re(Column source_strings,
                      object patterns,
                      Column repl_strings):
diff --git a/python/cudf/cudf/_lib/strings/split/partition.pyx b/python/cudf/cudf/_lib/strings/split/partition.pyx
index b17ea4e608d..281d131372a 100644
--- a/python/cudf/cudf/_lib/strings/split/partition.pyx
+++ b/python/cudf/cudf/_lib/strings/split/partition.pyx
@@ -3,6 +3,8 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.scalar.scalar cimport string_scalar
@@ -15,6 +17,7 @@ from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
 
 
+@acquire_spill_lock()
 def partition(Column source_strings,
               object py_delimiter):
     """
@@ -42,6 +45,7 @@ def partition(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def rpartition(Column source_strings,
                object py_delimiter):
     """
diff --git a/python/cudf/cudf/_lib/strings/split/split.pyx b/python/cudf/cudf/_lib/strings/split/split.pyx
index e96c911e83a..7a84cf75e37 100644
--- a/python/cudf/cudf/_lib/strings/split/split.pyx
+++ b/python/cudf/cudf/_lib/strings/split/split.pyx
@@ -4,6 +4,8 @@ from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -24,6 +26,7 @@ from cudf._lib.scalar cimport DeviceScalar
 from cudf._lib.utils cimport data_from_unique_ptr
 
 
+@acquire_spill_lock()
 def split(Column source_strings,
           object py_delimiter,
           size_type maxsplit):
@@ -54,6 +57,7 @@ def split(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def split_record(Column source_strings,
                  object py_delimiter,
                  size_type maxsplit):
@@ -83,6 +87,7 @@ def split_record(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def rsplit(Column source_strings,
            object py_delimiter,
            size_type maxsplit):
@@ -113,6 +118,7 @@ def rsplit(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def rsplit_record(Column source_strings,
                   object py_delimiter,
                   size_type maxsplit):
@@ -142,6 +148,7 @@ def rsplit_record(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def split_re(Column source_strings,
              object pattern,
              size_type maxsplit):
@@ -166,6 +173,7 @@ def split_re(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def rsplit_re(Column source_strings,
               object pattern,
               size_type maxsplit):
@@ -191,6 +199,7 @@ def rsplit_re(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def split_record_re(Column source_strings,
                     object pattern,
                     size_type maxsplit):
@@ -214,6 +223,7 @@ def split_record_re(Column source_strings,
     )
 
 
+@acquire_spill_lock()
 def rsplit_record_re(Column source_strings,
                      object pattern,
                      size_type maxsplit):
diff --git a/python/cudf/cudf/_lib/strings/strip.pyx b/python/cudf/cudf/_lib/strings/strip.pyx
index da3efe33786..2c53782d6ba 100644
--- a/python/cudf/cudf/_lib/strings/strip.pyx
+++ b/python/cudf/cudf/_lib/strings/strip.pyx
@@ -3,6 +3,8 @@
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -12,6 +14,7 @@ from cudf._lib.cpp.strings.strip cimport strip as cpp_strip
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def strip(Column source_strings,
           object py_repl):
     """
@@ -39,6 +42,7 @@ def strip(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def lstrip(Column source_strings,
            object py_repl):
     """
@@ -66,6 +70,7 @@ def lstrip(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def rstrip(Column source_strings,
            object py_repl):
     """
diff --git a/python/cudf/cudf/_lib/strings/substring.pyx b/python/cudf/cudf/_lib/strings/substring.pyx
index 761e9503aba..57bca09ee0e 100644
--- a/python/cudf/cudf/_lib/strings/substring.pyx
+++ b/python/cudf/cudf/_lib/strings/substring.pyx
@@ -1,16 +1,17 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
+
+import numpy as np
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
-from cudf._lib.cpp.types cimport size_type
-
-import numpy as np
-
 from cudf._lib.cpp.strings.substring cimport slice_strings as cpp_slice_strings
+from cudf._lib.cpp.types cimport size_type
 
 from cudf._lib.scalar import as_device_scalar
 
@@ -18,6 +19,7 @@ from cudf._lib.cpp.scalar.scalar cimport numeric_scalar
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def slice_strings(Column source_strings,
                   object start,
                   object end,
@@ -54,6 +56,7 @@ def slice_strings(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def slice_from(Column source_strings,
                Column starts,
                Column stops):
@@ -77,6 +80,7 @@ def slice_from(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def get(Column source_strings,
         object index):
     """
diff --git a/python/cudf/cudf/_lib/strings/translate.pyx b/python/cudf/cudf/_lib/strings/translate.pyx
index 7a5cf502ba3..262d479d914 100644
--- a/python/cudf/cudf/_lib/strings/translate.pyx
+++ b/python/cudf/cudf/_lib/strings/translate.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
@@ -6,6 +6,8 @@ from libcpp.pair cimport pair
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -19,6 +21,7 @@ from cudf._lib.cpp.types cimport char_utf8
 from cudf._lib.scalar cimport DeviceScalar
 
 
+@acquire_spill_lock()
 def translate(Column source_strings,
               object mapping_table):
     """
@@ -51,6 +54,7 @@ def translate(Column source_strings,
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def filter_characters(Column source_strings,
                       object mapping_table,
                       bool keep,
diff --git a/python/cudf/cudf/_lib/strings/wrap.pyx b/python/cudf/cudf/_lib/strings/wrap.pyx
index 5ebc33f77ef..8b0c367e791 100644
--- a/python/cudf/cudf/_lib/strings/wrap.pyx
+++ b/python/cudf/cudf/_lib/strings/wrap.pyx
@@ -1,8 +1,10 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 
+from cudf.core.buffer import acquire_spill_lock
+
 from cudf._lib.column cimport Column
 from cudf._lib.cpp.column.column cimport column
 from cudf._lib.cpp.column.column_view cimport column_view
@@ -10,6 +12,7 @@ from cudf._lib.cpp.strings.wrap cimport wrap as cpp_wrap
 from cudf._lib.cpp.types cimport size_type
 
 
+@acquire_spill_lock()
 def wrap(Column source_strings,
          size_type width):
     """
diff --git a/python/cudf/cudf/_lib/transform.pyx b/python/cudf/cudf/_lib/transform.pyx
index b95bce0db58..3787f1405b7 100644
--- a/python/cudf/cudf/_lib/transform.pyx
+++ b/python/cudf/cudf/_lib/transform.pyx
@@ -5,7 +5,7 @@ from numba.np import numpy_support
 import cudf
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 from cudf.core._internals.expressions import parse_expression
-from cudf.core.buffer import as_buffer
+from cudf.core.buffer import acquire_spill_lock, as_buffer
 from cudf.utils import cudautils
 
 from cython.operator cimport dereference
@@ -34,6 +34,7 @@ from cudf._lib.utils cimport (
 )
 
 
+@acquire_spill_lock()
 def bools_to_mask(Column col):
     """
     Given an int8 (boolean) column, compress the data from booleans to bits and
@@ -88,6 +89,7 @@ def nans_to_nulls(Column input):
     return buffer
 
 
+@acquire_spill_lock()
 def transform(Column input, op):
     cdef column_view c_input = input.view()
     cdef string c_str
@@ -132,8 +134,10 @@ def table_encode(list source_columns):
     with nogil:
         c_result = move(libcudf_transform.encode(c_input))
 
-    return columns_from_unique_ptr(
-        move(c_result.first)), Column.from_unique_ptr(move(c_result.second))
+    return (
+        columns_from_unique_ptr(move(c_result.first)),
+        Column.from_unique_ptr(move(c_result.second))
+    )
 
 
 def one_hot_encode(Column input_column, Column categories):
@@ -146,7 +150,11 @@ def one_hot_encode(Column input_column, Column categories):
             libcudf_transform.one_hot_encode(c_view_input, c_view_categories)
         )
 
-    owner = Column.from_unique_ptr(move(c_result.first))
+    # Notice, the data pointer of `owner` has been exposed
+    # through `c_result.second` at this point.
+    owner = Column.from_unique_ptr(
+        move(c_result.first), data_ptr_exposed=True
+    )
 
     pylist_categories = categories.to_arrow().to_pylist()
     encodings, _ = data_from_table_view(
@@ -156,10 +164,10 @@ def one_hot_encode(Column input_column, Column categories):
             x if x is not None else 'null' for x in pylist_categories
         ]
     )
-
     return encodings
 
 
+@acquire_spill_lock()
 def compute_column(list columns, tuple column_names, expr: str):
     """Compute a new column by evaluating an expression on a set of columns.
 
diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx
index b9eea6169bd..51e49b1f27a 100644
--- a/python/cudf/cudf/_lib/transpose.pyx
+++ b/python/cudf/cudf/_lib/transpose.pyx
@@ -20,7 +20,11 @@ def transpose(list source_columns):
     with nogil:
         c_result = move(cpp_transpose(c_input))
 
-    result_owner = Column.from_unique_ptr(move(c_result.first))
+    # Notice, the data pointer of `result_owner` has been exposed
+    # through `c_result.second` at this point.
+    result_owner = Column.from_unique_ptr(
+        move(c_result.first), data_ptr_exposed=True
+    )
     return columns_from_table_view(
         c_result.second,
         owners=[result_owner] * c_result.second.num_columns()
diff --git a/python/cudf/cudf/_lib/unary.pyx b/python/cudf/cudf/_lib/unary.pyx
index 52f0a804b2a..7ef4d00b9ff 100644
--- a/python/cudf/cudf/_lib/unary.pyx
+++ b/python/cudf/cudf/_lib/unary.pyx
@@ -3,6 +3,7 @@
 from enum import IntEnum
 
 from cudf.api.types import is_decimal_dtype
+from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -43,6 +44,7 @@ class UnaryOp(IntEnum):
     NOT = <underlying_type_t_unary_op> unary_operator.NOT
 
 
+@acquire_spill_lock()
 def unary_operation(Column input, object op):
     cdef column_view c_input = input.view()
     cdef unary_operator c_op = <unary_operator>(<underlying_type_t_unary_op>
@@ -60,6 +62,7 @@ def unary_operation(Column input, object op):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_null(Column input):
     cdef column_view c_input = input.view()
     cdef unique_ptr[column] c_result
@@ -70,6 +73,7 @@ def is_null(Column input):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_valid(Column input):
     cdef column_view c_input = input.view()
     cdef unique_ptr[column] c_result
@@ -80,6 +84,7 @@ def is_valid(Column input):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def cast(Column input, object dtype=np.float64):
     cdef column_view c_input = input.view()
     cdef data_type c_dtype = dtype_to_data_type(dtype)
@@ -95,6 +100,7 @@ def cast(Column input, object dtype=np.float64):
     return result
 
 
+@acquire_spill_lock()
 def is_nan(Column input):
     cdef column_view c_input = input.view()
     cdef unique_ptr[column] c_result
@@ -105,6 +111,7 @@ def is_nan(Column input):
     return Column.from_unique_ptr(move(c_result))
 
 
+@acquire_spill_lock()
 def is_non_nan(Column input):
     cdef column_view c_input = input.view()
     cdef unique_ptr[column] c_result
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index 1c8874a2abd..adf9fe39e4f 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -3,6 +3,8 @@
 
 import pickle
 
+import numpy
+
 import cudf
 
 
@@ -176,5 +178,9 @@ def host_deserialize(cls, header, frames):
 
     def __reduce_ex__(self, protocol):
         header, frames = self.host_serialize()
-        frames = [f.obj for f in frames]
+
+        # Since memoryviews are not pickable, we convert them to numpy
+        # arrays (zero-copy). This works seamlessly because host_deserialize
+        # converts the frames back into memoryviews.
+        frames = [numpy.asarray(f) for f in frames]
         return self.host_deserialize, (header, frames)
diff --git a/python/cudf/cudf/core/buffer/__init__.py b/python/cudf/cudf/core/buffer/__init__.py
index a73bc69ffb5..49f2c57b17f 100644
--- a/python/cudf/cudf/core/buffer/__init__.py
+++ b/python/cudf/cudf/core/buffer/__init__.py
@@ -1,4 +1,9 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
 from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
-from cudf.core.buffer.utils import as_buffer
+from cudf.core.buffer.spillable_buffer import SpillableBuffer, SpillLock
+from cudf.core.buffer.utils import (
+    acquire_spill_lock,
+    as_buffer,
+    get_spill_lock,
+)
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
new file mode 100644
index 00000000000..5ea1b90928b
--- /dev/null
+++ b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -0,0 +1,297 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import gc
+import io
+import threading
+import traceback
+import warnings
+import weakref
+from typing import List, Optional, Tuple
+
+import rmm.mr
+
+from cudf.core.buffer.spillable_buffer import SpillableBuffer
+from cudf.options import get_option
+from cudf.utils.string import format_bytes
+
+
+def get_traceback() -> str:
+    """Pretty print current traceback to a string"""
+    with io.StringIO() as f:
+        traceback.print_stack(file=f)
+        f.seek(0)
+        return f.read()
+
+
+def get_rmm_memory_resource_stack(
+    mr: rmm.mr.DeviceMemoryResource,
+) -> List[rmm.mr.DeviceMemoryResource]:
+    """Get the RMM resource stack
+
+    Parameters
+    ----------
+    mr : rmm.mr.DeviceMemoryResource
+        Top of the resource stack
+
+    Return
+    ------
+    list
+        List of RMM resources
+    """
+
+    if hasattr(mr, "upstream_mr"):
+        return [mr] + get_rmm_memory_resource_stack(mr.upstream_mr)
+    return [mr]
+
+
+class SpillManager:
+    """Manager of spillable buffers.
+
+    This class implements tracking of all known spillable buffers, on-demand
+    spilling of said buffers, and (optionally) maintains a memory usage limit.
+
+    When `spill_on_demand=True`, the manager registers an RMM out-of-memory
+    error handler, which will spill spillable buffers in order to free up
+    memory.
+
+    When `device_memory_limit=True`, the manager will try keep the device
+    memory usage below the specified limit by spilling of spillable buffers
+    continuously, which will introduce a modest overhead.
+
+    Parameters
+    ----------
+    spill_on_demand : bool
+        Enable spill on demand.
+    device_memory_limit: int, optional
+        If not None, this is the device memory limit in bytes that triggers
+        device to host spilling. The global manager sets this to the value
+        of `CUDF_SPILL_DEVICE_LIMIT` or None.
+    """
+
+    _buffers: weakref.WeakValueDictionary[int, SpillableBuffer]
+
+    def __init__(
+        self,
+        *,
+        spill_on_demand: bool = False,
+        device_memory_limit: int = None,
+    ) -> None:
+        self._lock = threading.Lock()
+        self._buffers = weakref.WeakValueDictionary()
+        self._id_counter = 0
+        self._spill_on_demand = spill_on_demand
+        self._device_memory_limit = device_memory_limit
+
+        if self._spill_on_demand:
+            # Set the RMM out-of-memory handle if not already set
+            mr = rmm.mr.get_current_device_resource()
+            if all(
+                not isinstance(m, rmm.mr.FailureCallbackResourceAdaptor)
+                for m in get_rmm_memory_resource_stack(mr)
+            ):
+                rmm.mr.set_current_device_resource(
+                    rmm.mr.FailureCallbackResourceAdaptor(
+                        mr, self._out_of_memory_handle
+                    )
+                )
+
+    def _out_of_memory_handle(self, nbytes: int, *, retry_once=True) -> bool:
+        """Try to handle an out-of-memory error by spilling
+
+        This can by used as the callback function to RMM's
+        `FailureCallbackResourceAdaptor`
+
+        Parameters
+        ----------
+        nbytes : int
+            Number of bytes to try to spill.
+        retry_once : bool, optional
+            If True, call `gc.collect()` and retry once.
+
+        Return
+        ------
+        bool
+            True if any buffers were freed otherwise False.
+
+        Warning
+        -------
+        In order to avoid deadlock, this function should not lock
+        already locked buffers.
+        """
+
+        # Let's try to spill device memory
+        spilled = self.spill_device_memory(nbytes=nbytes)
+
+        if spilled > 0:
+            return True  # Ask RMM to retry the allocation
+
+        if retry_once:
+            # Let's collect garbage and try one more time
+            gc.collect()
+            return self._out_of_memory_handle(nbytes, retry_once=False)
+
+        # TODO: write to log instead of stdout
+        print(
+            f"[WARNING] RMM allocation of {format_bytes(nbytes)} bytes "
+            "failed, spill-on-demand couldn't find any device memory to "
+            f"spill:\n{repr(self)}\ntraceback:\n{get_traceback()}"
+        )
+        return False  # Since we didn't find anything to spill, we give up
+
+    def add(self, buffer: SpillableBuffer) -> None:
+        """Add buffer to the set of managed buffers
+
+        The manager keeps a weak reference to the buffer
+
+        Parameters
+        ----------
+        buffer : SpillableBuffer
+            The buffer to manage
+        """
+        if buffer.size > 0 and not buffer.exposed:
+            with self._lock:
+                self._buffers[self._id_counter] = buffer
+                self._id_counter += 1
+        self.spill_to_device_limit()
+
+    def buffers(
+        self, order_by_access_time: bool = False
+    ) -> Tuple[SpillableBuffer, ...]:
+        """Get all managed buffers
+
+        Parameters
+        ----------
+        order_by_access_time : bool, optional
+            Order the buffer by access time (ascending order)
+
+        Return
+        ------
+        tuple
+            Tuple of buffers
+        """
+        with self._lock:
+            ret = tuple(self._buffers.values())
+        if order_by_access_time:
+            ret = tuple(sorted(ret, key=lambda b: b.last_accessed))
+        return ret
+
+    def spill_device_memory(self, nbytes: int) -> int:
+        """Try to spill device memory
+
+        This function is safe to call doing spill-on-demand
+        since it does not lock buffers already locked.
+
+        Parameters
+        ----------
+        nbytes : int
+            Number of bytes to try to spill
+
+        Return
+        ------
+        int
+            Number of actually bytes spilled.
+        """
+        spilled = 0
+        for buf in self.buffers(order_by_access_time=True):
+            if buf.lock.acquire(blocking=False):
+                try:
+                    if not buf.is_spilled and buf.spillable:
+                        buf.spill(target="cpu")
+                        spilled += buf.size
+                        if spilled >= nbytes:
+                            break
+                finally:
+                    buf.lock.release()
+        return spilled
+
+    def spill_to_device_limit(self, device_limit: int = None) -> int:
+        """Spill until device limit
+
+        Notice, by default this is a no-op.
+
+        Parameters
+        ----------
+        device_limit : int, optional
+            Limit in bytes. If None, the value of the environment variable
+            `CUDF_SPILL_DEVICE_LIMIT` is used. If this is not set, the method
+            does nothing and returns 0.
+
+        Return
+        ------
+        int
+            The number of bytes spilled.
+        """
+        limit = (
+            self._device_memory_limit if device_limit is None else device_limit
+        )
+        if limit is None:
+            return 0
+        ret = 0
+        while True:
+            unspilled = sum(
+                buf.size for buf in self.buffers() if not buf.is_spilled
+            )
+            if unspilled < limit:
+                break
+            nbytes = self.spill_device_memory(nbytes=limit - unspilled)
+            if nbytes == 0:
+                break  # No more to spill
+            ret += nbytes
+        return ret
+
+    def __repr__(self) -> str:
+        spilled = sum(buf.size for buf in self.buffers() if buf.is_spilled)
+        unspilled = sum(
+            buf.size for buf in self.buffers() if not buf.is_spilled
+        )
+        unspillable = 0
+        for buf in self.buffers():
+            if not (buf.is_spilled or buf.spillable):
+                unspillable += buf.size
+        unspillable_ratio = unspillable / unspilled if unspilled else 0
+
+        return (
+            f"<SpillManager spill_on_demand={self._spill_on_demand} "
+            f"device_memory_limit={self._device_memory_limit} | "
+            f"{format_bytes(spilled)} spilled | "
+            f"{format_bytes(unspilled)} ({unspillable_ratio:.0%}) "
+            f"unspilled (unspillable)>"
+        )
+
+
+# The global manager has three states:
+#   - Uninitialized
+#   - Initialized to None (spilling disabled)
+#   - Initialized to a SpillManager instance (spilling enabled)
+_global_manager_uninitialized: bool = True
+_global_manager: Optional[SpillManager] = None
+
+
+def set_global_manager(manager: Optional[SpillManager]) -> None:
+    """Set the global manager, which if None disables spilling"""
+
+    global _global_manager, _global_manager_uninitialized
+    if _global_manager is not None:
+        gc.collect()
+        buffers = _global_manager.buffers()
+        if len(buffers) > 0:
+            warnings.warn(f"overwriting non-empty manager: {buffers}")
+
+    _global_manager = manager
+    _global_manager_uninitialized = False
+
+
+def get_global_manager() -> Optional[SpillManager]:
+    """Get the global manager or None if spilling is disabled"""
+    global _global_manager_uninitialized
+    if _global_manager_uninitialized:
+        manager = None
+        if get_option("spill"):
+            manager = SpillManager(
+                spill_on_demand=get_option("spill_on_demand"),
+                device_memory_limit=get_option("spill_device_limit"),
+            )
+        set_global_manager(manager)
+    return _global_manager
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
new file mode 100644
index 00000000000..c42216be279
--- /dev/null
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -0,0 +1,474 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+from __future__ import annotations
+
+import collections.abc
+import pickle
+import time
+import weakref
+from threading import RLock
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, TypeVar
+
+import numpy
+
+import rmm
+
+from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
+from cudf.utils.string import format_bytes
+
+if TYPE_CHECKING:
+    from cudf.core.buffer.spill_manager import SpillManager
+
+
+T = TypeVar("T", bound="SpillableBuffer")
+
+
+class SpillLock:
+    pass
+
+
+class DelayedPointerTuple(collections.abc.Sequence):
+    """
+    A delayed version of the "data" field in __cuda_array_interface__.
+
+    The idea is to delay the access to `Buffer.ptr` until the user
+    actually accesses the data pointer.
+
+    For instance, in many cases __cuda_array_interface__ is accessed
+    only to determine whether an object is a CUDA object or not.
+
+    TODO: this doesn't support libraries such as PyTorch that declare
+    the tuple of __cuda_array_interface__["data"] in Cython. In such
+    cases, Cython will raise an error because DelayedPointerTuple
+    isn't a "real" tuple.
+    """
+
+    def __init__(self, buffer) -> None:
+        self._buf = buffer
+
+    def __len__(self):
+        return 2
+
+    def __getitem__(self, i):
+        if i == 0:
+            return self._buf.ptr
+        elif i == 1:
+            return False
+        raise IndexError("tuple index out of range")
+
+
+class SpillableBuffer(Buffer):
+    """A spillable buffer that implements DeviceBufferLike.
+
+    This buffer supports spilling the represented data to host memory.
+    Spilling can be done manually by calling `.spill(target="cpu")` but
+    usually the associated spilling manager triggers spilling based on current
+    device memory usage see `cudf.core.buffer.spill_manager.SpillManager`.
+    Unspill is triggered automatically when accessing the data of the buffer.
+
+    The buffer might not be spillable, which is based on the "expose" status
+    of the buffer. We say that the buffer has been exposed if the device
+    pointer (integer or void*) has been accessed outside of SpillableBuffer.
+    In this case, we cannot invalidate the device pointer by moving the data
+    to host.
+
+    A buffer can be exposed permanently at creation or by accessing the `.ptr`
+    property. To avoid this, one can use `.get_ptr()` instead, which support
+    exposing the buffer temporarily.
+
+    Use the factory function `as_buffer` to create a SpillableBuffer instance.
+    """
+
+    lock: RLock
+    _spill_locks: weakref.WeakSet
+    _last_accessed: float
+    _ptr_desc: Dict[str, Any]
+    _exposed: bool
+    _manager: SpillManager
+
+    def _finalize_init(self, ptr_desc: Dict[str, Any], exposed: bool) -> None:
+        """Finish initialization of the spillable buffer
+
+        This implements the common initialization that `_from_device_memory`
+        and `_from_host_memory` are missing.
+
+        Parameters
+        ----------
+        ptr_desc : dict
+            Description of the memory.
+        exposed : bool, optional
+            Mark the buffer as permanently exposed (unspillable).
+        """
+
+        from cudf.core.buffer.spill_manager import get_global_manager
+
+        self.lock = RLock()
+        self._spill_locks = weakref.WeakSet()
+        self._last_accessed = time.monotonic()
+        self._ptr_desc = ptr_desc
+        self._exposed = exposed
+        manager = get_global_manager()
+        if manager is None:
+            raise ValueError(
+                f"cannot create {self.__class__} without "
+                "a global spill manager"
+            )
+
+        self._manager = manager
+        self._manager.add(self)
+
+    @classmethod
+    def _from_device_memory(
+        cls: Type[T], data: Any, *, exposed: bool = False
+    ) -> T:
+        """Create a spillabe buffer from device memory.
+
+        No data is being copied.
+
+        Parameters
+        ----------
+        data : device-buffer-like
+            An object implementing the CUDA Array Interface.
+        exposed : bool, optional
+            Mark the buffer as permanently exposed (unspillable).
+
+        Returns
+        -------
+        SpillableBuffer
+            Buffer representing the same device memory as `data`
+        """
+        ret = super()._from_device_memory(data)
+        ret._finalize_init(ptr_desc={"type": "gpu"}, exposed=exposed)
+        return ret
+
+    @classmethod
+    def _from_host_memory(cls: Type[T], data: Any) -> T:
+        """Create a spillabe buffer from host memory.
+
+        Data must implement `__array_interface__`, the buffer protocol, and/or
+        be convertible to a buffer object using `numpy.array()`
+
+        The new buffer is marked as spilled to host memory already.
+
+        Raises ValueError if array isn't C-contiguous.
+
+        Parameters
+        ----------
+        data : Any
+            An object that represens host memory.
+
+        Returns
+        -------
+        SpillableBuffer
+            Buffer representing a copy of `data`.
+        """
+
+        # Convert to a memoryview using numpy array, this will not copy data
+        # in most cases.
+        data = memoryview(numpy.array(data, copy=False, subok=True))
+        if not data.c_contiguous:
+            raise ValueError("Buffer data must be C-contiguous")
+        data = data.cast("B")  # Make sure itemsize==1
+
+        # Create an already spilled buffer
+        ret = cls.__new__(cls)
+        ret._owner = None
+        ret._ptr = 0
+        ret._size = data.nbytes
+        ret._finalize_init(
+            ptr_desc={"type": "cpu", "memoryview": data}, exposed=False
+        )
+        return ret
+
+    @property
+    def is_spilled(self) -> bool:
+        return self._ptr_desc["type"] != "gpu"
+
+    def spill(self, target: str = "cpu") -> None:
+        """Spill or un-spill this buffer in-place
+
+        Parameters
+        ----------
+        target : str
+            The target of the spilling.
+        """
+
+        with self.lock:
+            ptr_type = self._ptr_desc["type"]
+            if ptr_type == target:
+                return
+
+            if not self.spillable:
+                raise ValueError(
+                    f"Cannot in-place move an unspillable buffer: {self}"
+                )
+
+            if (ptr_type, target) == ("gpu", "cpu"):
+                host_mem = memoryview(bytearray(self.size))
+                rmm._lib.device_buffer.copy_ptr_to_host(self._ptr, host_mem)
+                self._ptr_desc["memoryview"] = host_mem
+                self._ptr = 0
+                self._owner = None
+            elif (ptr_type, target) == ("cpu", "gpu"):
+                # Notice, this operation is prone to deadlock because the RMM
+                # allocation might trigger spilling-on-demand which in turn
+                # trigger a new call to this buffer's `spill()`.
+                # Therefore, it is important that spilling-on-demand doesn't
+                # try to unspill an already locked buffer!
+                dev_mem = rmm.DeviceBuffer.to_device(
+                    self._ptr_desc.pop("memoryview")
+                )
+                self._ptr = dev_mem.ptr
+                self._owner = dev_mem
+                assert self._size == dev_mem.size
+            else:
+                # TODO: support moving to disk
+                raise ValueError(f"Unknown target: {target}")
+            self._ptr_desc["type"] = target
+
+    @property
+    def ptr(self) -> int:
+        """Access the memory directly
+
+        Notice, this will mark the buffer as "exposed" and make
+        it unspillable permanently.
+
+        Consider using `.get_ptr()` instead.
+        """
+
+        self._manager.spill_to_device_limit()
+        with self.lock:
+            self.spill(target="gpu")
+            self._exposed = True
+            self._last_accessed = time.monotonic()
+            return self._ptr
+
+    def spill_lock(self, spill_lock: SpillLock) -> None:
+        """Spill lock the buffer
+
+        Mark the buffer as unspillable while `spill_lock` is alive,
+        which is tracked by monitoring a weakref to `spill_lock`.
+
+        Parameters
+        ----------
+        spill_lock : SpillLock
+            The object that defines the scope of the lock.
+        """
+
+        if spill_lock is None:
+            spill_lock = SpillLock()
+        with self.lock:
+            self.spill(target="gpu")
+            self._spill_locks.add(spill_lock)
+
+    def get_ptr(self, spill_lock: SpillLock = None) -> int:
+        """Get a device pointer to the memory of the buffer.
+
+        If spill_lock is not None, a reference to this buffer is added
+        to spill_lock, which disable spilling of this buffer while
+        spill_lock is alive.
+
+        Parameters
+        ----------
+        spill_lock : SpillLock, optional
+            Adding a reference of this buffer to the spill lock.
+
+        Return
+        ------
+        int
+            The device pointer as an integer
+        """
+
+        if spill_lock is None:
+            return self.ptr  # expose the buffer permanently
+
+        self.spill_lock(spill_lock)
+        self._last_accessed = time.monotonic()
+        return self._ptr
+
+    @property
+    def owner(self) -> Any:
+        return self._owner
+
+    @property
+    def exposed(self) -> bool:
+        return self._exposed
+
+    @property
+    def spillable(self) -> bool:
+        return not self._exposed and len(self._spill_locks) == 0
+
+    @property
+    def size(self) -> int:
+        return self._size
+
+    @property
+    def nbytes(self) -> int:
+        return self._size
+
+    @property
+    def last_accessed(self) -> float:
+        return self._last_accessed
+
+    @property
+    def __cuda_array_interface__(self) -> dict:
+        return {
+            "data": DelayedPointerTuple(self),
+            "shape": (self.size,),
+            "strides": None,
+            "typestr": "|u1",
+            "version": 0,
+        }
+
+    def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview:
+        size = self._size if size is None else size
+        with self.lock:
+            if self.spillable:
+                self.spill(target="cpu")
+                return self._ptr_desc["memoryview"][offset : offset + size]
+            else:
+                assert self._ptr_desc["type"] == "gpu"
+                ret = memoryview(bytearray(size))
+                rmm._lib.device_buffer.copy_ptr_to_host(
+                    self._ptr + offset, ret
+                )
+                return ret
+
+    def _getitem(self, offset: int, size: int) -> Buffer:
+        return SpillableBufferSlice(base=self, offset=offset, size=size)
+
+    def serialize(self) -> Tuple[dict, list]:
+        """Serialize the Buffer
+
+        Normally, we would use `[self]` as the frames. This would work but
+        also mean that `self` becomes exposed permanently if the frames are
+        later accessed through `__cuda_array_interface__`, which is exactly
+        what libraries like Dask+UCX would do when communicating!
+
+        The sound solution is to modify Dask et al. so that they access the
+        frames through `.get_ptr()` and holds on to the `spill_lock` until
+        the frame has been transferred. However, until this adaptation we
+        use a hack where the frame is a `Buffer` with a `spill_lock` as the
+        owner, which makes `self` unspillable while the frame is alive but
+        doesn't expose `self` when `__cuda_array_interface__` is accessed.
+
+        Warning, this hack means that the returned frame must be copied before
+        given to `.deserialize()`, otherwise we would have a `Buffer` pointing
+        to memory already owned by an existing `SpillableBuffer`.
+        """
+        header: Dict[Any, Any]
+        frames: List[Buffer | memoryview]
+        with self.lock:
+            header = {}
+            header["type-serialized"] = pickle.dumps(self.__class__)
+            header["frame_count"] = 1
+            if self.is_spilled:
+                frames = [self.memoryview()]
+            else:
+                # TODO: Use `frames=[self]` instead of this hack, see doc above
+                spill_lock = SpillLock()
+                ptr = self.get_ptr(spill_lock=spill_lock)
+                frames = [
+                    Buffer._from_device_memory(
+                        cuda_array_interface_wrapper(
+                            ptr=ptr,
+                            size=self.size,
+                            owner=(self._owner, spill_lock),
+                        )
+                    )
+                ]
+            return header, frames
+
+    def __repr__(self) -> str:
+        if self._ptr_desc["type"] != "gpu":
+            ptr_info = str(self._ptr_desc)
+        else:
+            ptr_info = str(hex(self._ptr))
+        return (
+            f"<SpillableBuffer size={format_bytes(self._size)} "
+            f"spillable={self.spillable} exposed={self.exposed} "
+            f"num-spill-locks={len(self._spill_locks)} "
+            f"ptr={ptr_info} owner={repr(self._owner)}>"
+        )
+
+
+class SpillableBufferSlice(SpillableBuffer):
+    """A slice of a spillable buffer
+
+    This buffer applies the slicing and then delegates all
+    operations to its base buffer.
+
+    Parameters
+    ----------
+    base : SpillableBuffer
+        The base of the view
+    offset : int
+        Memory offset into the base buffer
+    size : int
+        Size of the view (in bytes)
+    """
+
+    def __init__(self, base: SpillableBuffer, offset: int, size: int) -> None:
+        if size < 0:
+            raise ValueError("size cannot be negative")
+        if offset < 0:
+            raise ValueError("offset cannot be negative")
+        if offset + size > base.size:
+            raise ValueError(
+                "offset+size cannot be greater than the size of base"
+            )
+        self._base = base
+        self._offset = offset
+        self._size = size
+        self._owner = base
+        self.lock = base.lock
+
+    @property
+    def ptr(self) -> int:
+        return self._base.ptr + self._offset
+
+    def get_ptr(self, spill_lock: SpillLock = None) -> int:
+        return self._base.get_ptr(spill_lock=spill_lock) + self._offset
+
+    def _getitem(self, offset: int, size: int) -> Buffer:
+        return SpillableBufferSlice(
+            base=self._base, offset=offset + self._offset, size=size
+        )
+
+    @classmethod
+    def deserialize(cls, header: dict, frames: list):
+        # TODO: because of the hack in `SpillableBuffer.serialize()` where
+        # frames are of type `Buffer`, we always deserialize as if they are
+        # `SpillableBuffer`. In the future, we should be able to
+        # deserialize into `SpillableBufferSlice` when the frames hasn't been
+        # copied.
+        return SpillableBuffer.deserialize(header, frames)
+
+    def memoryview(self, *, offset: int = 0, size: int = None) -> memoryview:
+        size = self._size if size is None else size
+        return self._base.memoryview(offset=self._offset + offset, size=size)
+
+    def __repr__(self) -> str:
+        return (
+            f"<SpillableBufferSlice size={format_bytes(self._size)} "
+            f"offset={format_bytes(self._offset)} of {self._base} "
+        )
+
+    # The rest of the methods delegate to the base buffer.
+    def spill(self, target: str = "cpu") -> None:
+        return self._base.spill(target=target)
+
+    @property
+    def is_spilled(self) -> bool:
+        return self._base.is_spilled
+
+    @property
+    def exposed(self) -> bool:
+        return self._base.exposed
+
+    @property
+    def spillable(self) -> bool:
+        return self._base.spillable
+
+    def spill_lock(self, spill_lock: SpillLock) -> None:
+        self._base.spill_lock(spill_lock=spill_lock)
diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py
index 5e017c4bc92..71d8ce9853c 100644
--- a/python/cudf/cudf/core/buffer/utils.py
+++ b/python/cudf/cudf/core/buffer/utils.py
@@ -2,9 +2,13 @@
 
 from __future__ import annotations
 
-from typing import Any, Union
+import threading
+from contextlib import ContextDecorator
+from typing import Any, Dict, Optional, Tuple, Union
 
 from cudf.core.buffer.buffer import Buffer, cuda_array_interface_wrapper
+from cudf.core.buffer.spill_manager import get_global_manager
+from cudf.core.buffer.spillable_buffer import SpillableBuffer, SpillLock
 
 
 def as_buffer(
@@ -12,6 +16,7 @@ def as_buffer(
     *,
     size: int = None,
     owner: object = None,
+    exposed: bool = False,
 ) -> Buffer:
     """Factory function to wrap `data` in a Buffer object.
 
@@ -37,6 +42,10 @@ def as_buffer(
     owner : object, optional
         Python object to which the lifetime of the memory allocation is tied.
         A reference to this object is kept in the returned Buffer.
+    exposed : bool, optional
+        Mark the buffer as permanently exposed (unspillable). This is ignored
+        unless spilling is enabled and the data represents device memory, see
+        SpillableBuffer.
 
     Return
     ------
@@ -62,6 +71,66 @@ def as_buffer(
             "`data` is a buffer-like or array-like object"
         )
 
+    if get_global_manager() is not None:
+        if hasattr(data, "__cuda_array_interface__"):
+            return SpillableBuffer._from_device_memory(data, exposed=exposed)
+        if exposed:
+            raise ValueError("cannot created exposed host memory")
+        return SpillableBuffer._from_host_memory(data)
+
     if hasattr(data, "__cuda_array_interface__"):
         return Buffer._from_device_memory(data)
     return Buffer._from_host_memory(data)
+
+
+_thread_spill_locks: Dict[int, Tuple[Optional[SpillLock], int]] = {}
+
+
+def _push_thread_spill_lock() -> None:
+    _id = threading.get_ident()
+    spill_lock, count = _thread_spill_locks.get(_id, (None, 0))
+    if spill_lock is None:
+        spill_lock = SpillLock()
+    _thread_spill_locks[_id] = (spill_lock, count + 1)
+
+
+def _pop_thread_spill_lock() -> None:
+    _id = threading.get_ident()
+    spill_lock, count = _thread_spill_locks[_id]
+    if count == 1:
+        spill_lock = None
+    _thread_spill_locks[_id] = (spill_lock, count - 1)
+
+
+class acquire_spill_lock(ContextDecorator):
+    """Decorator and context to set spill lock automatically.
+
+    All calls to `get_spill_lock()` within the decorated function or context
+    will return a spill lock with a lifetime bound to the function or context.
+
+    Developer Notes
+    ---------------
+    We use the global variable `_thread_spill_locks` to track the global spill
+    lock state. To support concurrency, each thread tracks its own state by
+    pushing and poping from `_thread_spill_locks` using its thread ID.
+    """
+
+    def __enter__(self) -> Optional[SpillLock]:
+        _push_thread_spill_lock()
+        return get_spill_lock()
+
+    def __exit__(self, *exc):
+        _pop_thread_spill_lock()
+
+
+def get_spill_lock() -> Union[SpillLock, None]:
+    """Return a spill lock within the context of `acquire_spill_lock` or None
+
+    Returns None, if spilling is disabled.
+    """
+
+    if get_global_manager() is None:
+        return None
+    _id = threading.get_ident()
+    spill_lock, _ = _thread_spill_locks.get(_id, (None, 0))
+    return spill_lock
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 59851a1c11b..a51703ae57e 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1765,7 +1765,7 @@ def as_column(
         ):
             arbitrary = cupy.ascontiguousarray(arbitrary)
 
-        data = as_buffer(arbitrary)
+        data = as_buffer(arbitrary, exposed=True)
         col = build_column(data, dtype=current_dtype, mask=mask)
 
         if dtype is not None:
@@ -2222,7 +2222,7 @@ def _mask_from_cuda_array_interface_desc(obj) -> Union[Buffer, None]:
         typecode = typestr[1]
         if typecode == "t":
             mask_size = bitmask_allocation_size_bytes(nelem)
-            mask = as_buffer(data=ptr, size=mask_size, owner=obj)
+            mask = as_buffer(data=ptr, size=mask_size, owner=obj, exposed=True)
         elif typecode == "b":
             col = as_column(mask)
             mask = bools_to_mask(col)
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index b29fc41e5b4..b38d3048ed7 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -721,7 +721,9 @@ def _protocol_to_cudf_column_numeric(
     _dbuffer, _ddtype = buffers["data"]
     _check_buffer_is_on_gpu(_dbuffer)
     cudfcol_num = build_column(
-        as_buffer(data=_dbuffer.ptr, size=_dbuffer.bufsize, owner=None),
+        as_buffer(
+            data=_dbuffer.ptr, size=_dbuffer.bufsize, owner=None, exposed=True
+        ),
         protocol_dtype_to_cupy_dtype(_ddtype),
     )
     return _set_missing_values(col, cudfcol_num), buffers
@@ -751,7 +753,11 @@ def _set_missing_values(
     valid_mask = protocol_col.get_buffers()["validity"]
     if valid_mask is not None:
         bitmask = cp.asarray(
-            as_buffer(data=valid_mask[0].ptr, size=valid_mask[0].bufsize),
+            as_buffer(
+                data=valid_mask[0].ptr,
+                size=valid_mask[0].bufsize,
+                exposed=True,
+            ),
             cp.bool8,
         )
         cudf_col[~bitmask] = None
@@ -790,7 +796,9 @@ def _protocol_to_cudf_column_categorical(
     _check_buffer_is_on_gpu(codes_buffer)
     cdtype = protocol_dtype_to_cupy_dtype(codes_dtype)
     codes = build_column(
-        as_buffer(data=codes_buffer.ptr, size=codes_buffer.bufsize),
+        as_buffer(
+            data=codes_buffer.ptr, size=codes_buffer.bufsize, exposed=True
+        ),
         cdtype,
     )
 
@@ -822,7 +830,9 @@ def _protocol_to_cudf_column_string(
     data_buffer, data_dtype = buffers["data"]
     _check_buffer_is_on_gpu(data_buffer)
     encoded_string = build_column(
-        as_buffer(data=data_buffer.ptr, size=data_buffer.bufsize),
+        as_buffer(
+            data=data_buffer.ptr, size=data_buffer.bufsize, exposed=True
+        ),
         protocol_dtype_to_cupy_dtype(data_dtype),
     )
 
@@ -832,7 +842,9 @@ def _protocol_to_cudf_column_string(
     offset_buffer, offset_dtype = buffers["offsets"]
     _check_buffer_is_on_gpu(offset_buffer)
     offsets = build_column(
-        as_buffer(data=offset_buffer.ptr, size=offset_buffer.bufsize),
+        as_buffer(
+            data=offset_buffer.ptr, size=offset_buffer.bufsize, exposed=True
+        ),
         protocol_dtype_to_cupy_dtype(offset_dtype),
     )
 
diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py
index 7f6a6f10e25..4a0a0437e00 100644
--- a/python/cudf/cudf/options.py
+++ b/python/cudf/cudf/options.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
+import os
 import textwrap
 from collections.abc import Container
 from dataclasses import dataclass
@@ -17,6 +18,26 @@ class Option:
 _OPTIONS: Dict[str, Option] = {}
 
 
+def _env_get_int(name, default):
+    try:
+        return int(os.getenv(name, default))
+    except (ValueError, TypeError):
+        return default
+
+
+def _env_get_bool(name, default):
+    env = os.getenv(name)
+    if env is None:
+        return default
+    as_a_int = _env_get_int(name, None)
+    env = env.lower().strip()
+    if env == "true" or env == "on" or as_a_int:
+        return True
+    if env == "false" or env == "off" or as_a_int == 0:
+        return False
+    return default
+
+
 def _register_option(
     name: str, default_value: Any, description: str, validator: Callable
 ):
@@ -129,6 +150,16 @@ def _validator(val):
     return _validator
 
 
+def _integer_and_none_validator(val):
+    try:
+        if val is None or int(val):
+            return
+    except ValueError:
+        raise ValueError(
+            f"{val} is not a valid option. " f"Must be an integer or None."
+        )
+
+
 _register_option(
     "default_integer_bitwidth",
     None,
@@ -163,3 +194,43 @@ def _validator(val):
     ),
     _make_contains_validator([None, 32, 64]),
 )
+
+
+_register_option(
+    "spill",
+    _env_get_bool("CUDF_SPILL", False),
+    textwrap.dedent(
+        """
+        Enables spilling.
+        \tValid values are True or False. Default is False.
+        """
+    ),
+    _make_contains_validator([False, True]),
+)
+
+_register_option(
+    "spill_on_demand",
+    _env_get_bool("CUDF_SPILL_ON_DEMAND", True),
+    textwrap.dedent(
+        """
+        Enables spilling on demand using an RMM out-of-memory error handler.
+        This has no effect if spilling is disabled, see the "spill" option.
+        \tValid values are True or False. Default is True.
+        """
+    ),
+    _make_contains_validator([False, True]),
+)
+
+_register_option(
+    "spill_device_limit",
+    _env_get_int("CUDF_SPILL_DEVICE_LIMIT", None),
+    textwrap.dedent(
+        """
+        Enforce a device memory limit in bytes.
+        This has no effect if spilling is disabled, see the "spill" option.
+        \tValid values are any positive integer or None (disabled).
+        \tDefault is None.
+        """
+    ),
+    _integer_and_none_validator,
+)
diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 259257c257f..5465462d7c2 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -352,8 +352,12 @@ def assert_column_memory_eq(
     children to the same constraints. Also fails check if the number of
     children mismatches at any level.
     """
-    assert lhs.base_data_ptr == rhs.base_data_ptr
-    assert lhs.base_mask_ptr == rhs.base_mask_ptr
+
+    def get_ptr(x) -> int:
+        return x.ptr if x else 0
+
+    assert get_ptr(lhs.base_data) == get_ptr(rhs.base_data)
+    assert get_ptr(lhs.base_mask) == get_ptr(rhs.base_mask)
     assert lhs.base_size == rhs.base_size
     assert lhs.offset == rhs.offset
     assert lhs.size == rhs.size
diff --git a/python/cudf/cudf/tests/conftest.py b/python/cudf/cudf/tests/conftest.py
index 258b628305d..30d8f1c8422 100644
--- a/python/cudf/cudf/tests/conftest.py
+++ b/python/cudf/cudf/tests/conftest.py
@@ -158,3 +158,21 @@ def default_float_bitwidth(request):
     cudf.set_option("default_float_bitwidth", request.param)
     yield request.param
     cudf.set_option("default_float_bitwidth", old_default)
+
+
+@pytest.hookimpl(tryfirst=True, hookwrapper=True)
+def pytest_runtest_makereport(item, call):
+    """Hook to make result information available in fixtures
+
+    This makes it possible for a pytest.fixture to access the current test
+    state through `request.node.report`.
+    See the `manager` fixture in `test_spilling.py` for an example.
+
+    Pytest doc: <https://docs.pytest.org/en/latest/example/simple.html>
+    """
+    outcome = yield
+    rep = outcome.get_result()
+
+    # Set a report attribute for each phase of a call, which can
+    # be "setup", "call", "teardown"
+    setattr(item, "report", {rep.when: rep})
diff --git a/python/cudf/cudf/tests/pytest.ini b/python/cudf/cudf/tests/pytest.ini
new file mode 100644
index 00000000000..7adbdb72d72
--- /dev/null
+++ b/python/cudf/cudf/tests/pytest.ini
@@ -0,0 +1,5 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+[pytest]
+markers =
+    spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 8337084be72..6b720c3ad5c 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -47,6 +47,8 @@
     operator.ge,
 ]
 
+pytestmark = pytest.mark.spilling
+
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
 @pytest.mark.parametrize("binop", _binops)
diff --git a/python/cudf/cudf/tests/test_buffer.py b/python/cudf/cudf/tests/test_buffer.py
index 5ed5750f29b..df7152d53a6 100644
--- a/python/cudf/cudf/tests/test_buffer.py
+++ b/python/cudf/cudf/tests/test_buffer.py
@@ -5,6 +5,8 @@
 
 from cudf.core.buffer import Buffer, as_buffer
 
+pytestmark = pytest.mark.spilling
+
 arr_len = 10
 
 
@@ -48,15 +50,21 @@ def test_buffer_from_cuda_iface_dtype(data, dtype):
 
 def test_buffer_creation_from_any():
     ary = cp.arange(arr_len)
-    b = as_buffer(ary)
+    b = as_buffer(ary, exposed=True)
     assert isinstance(b, Buffer)
-    assert ary.__cuda_array_interface__["data"][0] == b.ptr
+    assert ary.data.ptr == b.ptr
     assert ary.nbytes == b.size
 
     with pytest.raises(
         ValueError, match="size must be specified when `data` is an integer"
     ):
-        as_buffer(42)
+        as_buffer(ary.data.ptr)
+
+    b = as_buffer(ary.data.ptr, size=ary.nbytes, owner=ary, exposed=True)
+    assert isinstance(b, Buffer)
+    assert ary.data.ptr == b.ptr
+    assert ary.nbytes == b.size
+    assert b.owner.owner is ary
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py
index 9b9709b52c3..e81f4ec795a 100644
--- a/python/cudf/cudf/tests/test_cuda_array_interface.py
+++ b/python/cudf/cudf/tests/test_cuda_array_interface.py
@@ -10,6 +10,7 @@
 from numba import cuda
 
 import cudf
+from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
 
 
@@ -169,6 +170,13 @@ def test_column_from_ephemeral_cupy_try_lose_reference():
     assert_eq(pd.Series([1, 2, 3]), a.to_pandas())
 
 
+@pytest.mark.xfail(
+    get_global_manager() is not None,
+    reason=(
+        "spilling doesn't support PyTorch, see "
+        "`cudf.core.buffer.spillable_buffer.DelayedPointerTuple`"
+    ),
+)
 def test_cuda_array_interface_pytorch():
     torch = pytest.importorskip("torch", minversion="1.6.0")
     if not torch.cuda.is_available():
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 4ec770e0d6b..105f86df22e 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -40,6 +40,8 @@
     gen_rand,
 )
 
+pytestmark = pytest.mark.spilling
+
 
 def test_init_via_list_of_tuples():
     data = [
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index b00e31115c9..dd1f726c783 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1456,7 +1456,7 @@ def test_groupby_attribute_error():
     class TestGroupBy(cudf.core.groupby.GroupBy):
         @property
         def _groupby(self):
-            raise AttributeError("Test error message")
+            raise AttributeError(err_msg)
 
     a = cudf.DataFrame({"a": [1, 2], "b": [2, 3]})
     gb = TestGroupBy(a, a["a"])
diff --git a/python/cudf/cudf/tests/test_onehot.py b/python/cudf/cudf/tests/test_onehot.py
index 3c067975566..d42b0e85d28 100644
--- a/python/cudf/cudf/tests/test_onehot.py
+++ b/python/cudf/cudf/tests/test_onehot.py
@@ -10,6 +10,8 @@
 from cudf import DataFrame
 from cudf.testing import _utils as utils
 
+pytestmark = pytest.mark.spilling
+
 
 @pytest.mark.parametrize(
     "data, index",
diff --git a/python/cudf/cudf/tests/test_pickling.py b/python/cudf/cudf/tests/test_pickling.py
index 21343f19d79..8ce818e7a3d 100644
--- a/python/cudf/cudf/tests/test_pickling.py
+++ b/python/cudf/cudf/tests/test_pickling.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
-import sys
+import pickle
 
 import numpy as np
 import pandas as pd
@@ -10,13 +10,7 @@
 from cudf.core.buffer import as_buffer
 from cudf.testing._utils import assert_eq
 
-if sys.version_info < (3, 8):
-    try:
-        import pickle5 as pickle
-    except ImportError:
-        import pickle
-else:
-    import pickle
+pytestmark = pytest.mark.spilling
 
 
 def check_serialization(df):
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 181bff8512a..280b619c305 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -16,6 +16,8 @@
     assert_eq,
 )
 
+pytestmark = pytest.mark.spilling
+
 
 @pytest.mark.parametrize("num_id_vars", [0, 1, 2])
 @pytest.mark.parametrize("num_value_vars", [0, 1, 2])
diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py
new file mode 100644
index 00000000000..6f790600d92
--- /dev/null
+++ b/python/cudf/cudf/tests/test_spilling.py
@@ -0,0 +1,477 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+import importlib
+import random
+import time
+import warnings
+from concurrent.futures import ThreadPoolExecutor
+from typing import Tuple
+
+import cupy
+import numpy as np
+import pandas
+import pandas.testing
+import pytest
+
+import rmm
+
+import cudf
+import cudf.core.buffer.spill_manager
+import cudf.options
+from cudf.core.abc import Serializable
+from cudf.core.buffer import (
+    Buffer,
+    acquire_spill_lock,
+    as_buffer,
+    get_spill_lock,
+)
+from cudf.core.buffer.spill_manager import (
+    SpillManager,
+    get_global_manager,
+    get_rmm_memory_resource_stack,
+    set_global_manager,
+)
+from cudf.core.buffer.spillable_buffer import (
+    SpillableBuffer,
+    SpillableBufferSlice,
+    SpillLock,
+)
+from cudf.testing._utils import assert_eq
+
+if get_global_manager() is not None:
+    pytest.skip(
+        "cannot test spilling when enabled globally, set `CUDF_SPILL=off`",
+        allow_module_level=True,
+    )
+
+
+def gen_df(target="gpu") -> cudf.DataFrame:
+    ret = cudf.DataFrame({"a": [1, 2, 3]})
+    if target != "gpu":
+        gen_df.buffer(ret).spill(target=target)
+    return ret
+
+
+gen_df.buffer = lambda df: df._data._data["a"].data
+gen_df.is_spilled = lambda df: gen_df.buffer(df).is_spilled
+gen_df.is_spillable = lambda df: gen_df.buffer(df).spillable
+gen_df.buffer_size = gen_df.buffer(gen_df()).size
+
+
+def spilled_and_unspilled(manager: SpillManager) -> Tuple[int, int]:
+    """Get bytes spilled and unspilled known by the manager"""
+    spilled = sum(buf.size for buf in manager.buffers() if buf.is_spilled)
+    unspilled = sum(
+        buf.size for buf in manager.buffers() if not buf.is_spilled
+    )
+    return spilled, unspilled
+
+
+@pytest.fixture
+def manager(request):
+    """Fixture to enable and make a spilling manager availabe"""
+    kwargs = dict(getattr(request, "param", {}))
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        set_global_manager(manager=SpillManager(**kwargs))
+        yield get_global_manager()
+        # Retrieving the test result using the `pytest_runtest_makereport`
+        # hook from conftest.py
+        if request.node.report["call"].failed:
+            # Ignore `overwriting non-empty manager` errors when
+            # test is failing.
+            warnings.simplefilter("ignore")
+        set_global_manager(manager=None)
+
+
+def test_spillable_buffer(manager: SpillManager):
+    buf = as_buffer(data=rmm.DeviceBuffer(size=10), exposed=False)
+    assert isinstance(buf, SpillableBuffer)
+    assert buf.spillable
+    buf.ptr  # Expose pointer
+    assert buf.exposed
+    assert not buf.spillable
+    buf = as_buffer(data=rmm.DeviceBuffer(size=10), exposed=False)
+    # Notice, accessing `__cuda_array_interface__` itself doesn't
+    # expose the pointer, only accessing the "data" field exposes
+    # the pointer.
+    iface = buf.__cuda_array_interface__
+    assert not buf.exposed
+    assert buf.spillable
+    iface["data"][0]  # Expose pointer
+    assert buf.exposed
+    assert not buf.spillable
+
+
+@pytest.mark.parametrize(
+    "attribute",
+    [
+        "ptr",
+        "get_ptr",
+        "memoryview",
+        "is_spilled",
+        "exposed",
+        "spillable",
+        "spill_lock",
+        "spill",
+    ],
+)
+def test_spillable_buffer_view_attributes(manager: SpillManager, attribute):
+    base = as_buffer(data=rmm.DeviceBuffer(size=10), exposed=False)
+    view = base[:]
+    attr_base = getattr(base, attribute)
+    attr_view = getattr(view, attribute)
+    if callable(attr_view):
+        pass
+    else:
+        assert attr_base == attr_view
+
+
+def test_from_pandas(manager: SpillManager):
+    pdf1 = pandas.DataFrame({"x": [1, 2, 3]})
+    df = cudf.from_pandas(pdf1)
+    assert df._data._data["x"].data.spillable
+    pdf2 = df.to_pandas()
+    pandas.testing.assert_frame_equal(pdf1, pdf2)
+
+
+def test_creations(manager: SpillManager):
+    df = cudf.datasets.timeseries()
+    assert isinstance(df._data._data["x"].data, SpillableBuffer)
+    assert df._data._data["x"].data.spillable
+    df = cudf.DataFrame({"x": [1, 2, 3]})
+    assert df._data._data["x"].data.spillable
+    df = cudf.datasets.randomdata(10)
+    assert df._data._data["x"].data.spillable
+
+
+def test_spillable_df_groupby(manager: SpillManager):
+    df = cudf.DataFrame({"x": [1, 1, 1]})
+    gb = df.groupby("x")
+    assert len(df._data._data["x"].base_data._spill_locks) == 0
+    gb._groupby
+    # `gb._groupby`, which is cached on `gb`, holds a spill lock
+    assert len(df._data._data["x"].base_data._spill_locks) == 1
+    assert not df._data._data["x"].data.spillable
+    del gb
+    assert df._data._data["x"].data.spillable
+
+
+def test_spilling_buffer(manager: SpillManager):
+    buf = as_buffer(rmm.DeviceBuffer(size=10), exposed=False)
+    buf.spill(target="cpu")
+    assert buf.is_spilled
+    buf.ptr  # Expose pointer and trigger unspill
+    assert not buf.is_spilled
+    with pytest.raises(ValueError, match="unspillable buffer"):
+        buf.spill(target="cpu")
+
+
+def test_environment_variables(monkeypatch):
+    def reload_options():
+        # In order to enabling monkey patching of the environment variables
+        # mark the global manager as uninitialized.
+        set_global_manager(None)
+        cudf.core.buffer.spill_manager._global_manager_uninitialized = True
+        importlib.reload(cudf.options)
+
+    monkeypatch.setenv("CUDF_SPILL_ON_DEMAND", "off")
+    monkeypatch.setenv("CUDF_SPILL", "off")
+    reload_options()
+    assert get_global_manager() is None
+
+    monkeypatch.setenv("CUDF_SPILL", "on")
+    reload_options()
+    manager = get_global_manager()
+    assert isinstance(manager, SpillManager)
+    assert manager._spill_on_demand is False
+    assert manager._device_memory_limit is None
+
+    monkeypatch.setenv("CUDF_SPILL_DEVICE_LIMIT", "1000")
+    reload_options()
+    manager = get_global_manager()
+    assert isinstance(manager, SpillManager)
+    assert manager._device_memory_limit == 1000
+
+
+def test_spill_device_memory(manager: SpillManager):
+    df = gen_df()
+    assert spilled_and_unspilled(manager) == (0, gen_df.buffer_size)
+    manager.spill_device_memory(nbytes=1)
+    assert spilled_and_unspilled(manager) == (gen_df.buffer_size, 0)
+    del df
+    assert spilled_and_unspilled(manager) == (0, 0)
+    df1 = gen_df()
+    df2 = gen_df()
+    manager.spill_device_memory(nbytes=1)
+    assert gen_df.is_spilled(df1)
+    assert not gen_df.is_spilled(df2)
+    manager.spill_device_memory(nbytes=1)
+    assert gen_df.is_spilled(df1)
+    assert gen_df.is_spilled(df2)
+    df3 = df1 + df2
+    assert not gen_df.is_spilled(df1)
+    assert not gen_df.is_spilled(df2)
+    assert not gen_df.is_spilled(df3)
+    manager.spill_device_memory(nbytes=1)
+    assert gen_df.is_spilled(df1)
+    assert not gen_df.is_spilled(df2)
+    assert not gen_df.is_spilled(df3)
+    df2.abs()  # Should change the access time
+    manager.spill_device_memory(nbytes=1)
+    assert gen_df.is_spilled(df1)
+    assert not gen_df.is_spilled(df2)
+    assert gen_df.is_spilled(df3)
+
+
+def test_spill_to_device_limit(manager: SpillManager):
+    df1 = gen_df()
+    df2 = gen_df()
+    assert spilled_and_unspilled(manager) == (0, gen_df.buffer_size * 2)
+    manager.spill_to_device_limit(device_limit=0)
+    assert spilled_and_unspilled(manager) == (gen_df.buffer_size * 2, 0)
+    df3 = df1 + df2
+    manager.spill_to_device_limit(device_limit=0)
+    assert spilled_and_unspilled(manager) == (gen_df.buffer_size * 3, 0)
+    assert gen_df.is_spilled(df1)
+    assert gen_df.is_spilled(df2)
+    assert gen_df.is_spilled(df3)
+
+
+@pytest.mark.parametrize(
+    "manager", [{"device_memory_limit": 0}], indirect=True
+)
+def test_zero_device_limit(manager: SpillManager):
+    assert manager._device_memory_limit == 0
+    df1 = gen_df()
+    df2 = gen_df()
+    assert spilled_and_unspilled(manager) == (gen_df.buffer_size * 2, 0)
+    df1 + df2
+    # Notice, while performing the addintion both df1 and df2 are unspillable
+    assert spilled_and_unspilled(manager) == (0, gen_df.buffer_size * 2)
+    manager.spill_to_device_limit()
+    assert spilled_and_unspilled(manager) == (gen_df.buffer_size * 2, 0)
+
+
+def test_external_memory_never_spills(manager):
+    """
+    Test that external data, i.e., data not managed by RMM,
+    is never spilled
+    """
+
+    cupy.cuda.set_allocator()  # uses default allocator
+
+    a = cupy.asarray([1, 2, 3])
+    s = cudf.Series(a)
+    assert len(manager.buffers()) == 0
+    assert not s._data[None].data.spillable
+
+
+def test_spilling_df_views(manager):
+    df = gen_df(target="cpu")
+    assert gen_df.is_spilled(df)
+    df_view = df.loc[1:]
+    assert gen_df.is_spillable(df_view)
+    assert gen_df.is_spillable(df)
+
+
+def test_modify_spilled_views(manager):
+    df = gen_df()
+    df_view = df.iloc[1:]
+    buf = gen_df.buffer(df)
+    buf.spill(target="cpu")
+
+    # modify the spilled df and check that the changes are reflected
+    # in the view
+    df.iloc[1:] = 0
+    assert_eq(df_view, df.iloc[1:])
+
+    # now, modify the view and check that the changes are reflected in
+    # the df
+    df_view.iloc[:] = -1
+    assert_eq(df_view, df.iloc[1:])
+
+
+def test_ptr_restricted(manager: SpillManager):
+    buf = as_buffer(data=rmm.DeviceBuffer(size=10), exposed=False)
+    assert buf.spillable
+    assert len(buf._spill_locks) == 0
+    slock1 = SpillLock()
+    buf.get_ptr(spill_lock=slock1)
+    assert not buf.spillable
+    assert len(buf._spill_locks) == 1
+    slock2 = SpillLock()
+    buf.spill_lock(spill_lock=slock2)
+    buf.get_ptr(spill_lock=slock2)
+    assert not buf.spillable
+    assert len(buf._spill_locks) == 2
+    del slock1
+    assert len(buf._spill_locks) == 1
+    del slock2
+    assert len(buf._spill_locks) == 0
+    assert buf.spillable
+
+
+def test_get_spill_lock(manager: SpillManager):
+    @acquire_spill_lock()
+    def f(sleep=False, nest=0):
+        if sleep:
+            time.sleep(random.random() / 100)
+        if nest:
+            return f(nest=nest - 1)
+        return get_spill_lock()
+
+    assert get_spill_lock() is None
+    slock = f()
+    assert isinstance(slock, SpillLock)
+    assert get_spill_lock() is None
+    slock = f(nest=2)
+    assert isinstance(slock, SpillLock)
+    assert get_spill_lock() is None
+
+    with ThreadPoolExecutor(max_workers=2) as executor:
+        futures_with_spill_lock = []
+        futures_without_spill_lock = []
+        for _ in range(100):
+            futures_with_spill_lock.append(
+                executor.submit(f, sleep=True, nest=1)
+            )
+            futures_without_spill_lock.append(
+                executor.submit(f, sleep=True, nest=1)
+            )
+        all(isinstance(f.result(), SpillLock) for f in futures_with_spill_lock)
+        all(f is None for f in futures_without_spill_lock)
+
+
+def test_get_spill_lock_no_manager():
+    """When spilling is disabled, get_spill_lock() should return None always"""
+
+    @acquire_spill_lock()
+    def f():
+        return get_spill_lock()
+
+    assert get_spill_lock() is None
+    assert f() is None
+
+
+@pytest.mark.parametrize("target", ["gpu", "cpu"])
+@pytest.mark.parametrize("view", [None, slice(0, 2), slice(1, 3)])
+def test_serialize_device(manager, target, view):
+    df1 = gen_df()
+    if view is not None:
+        df1 = df1.iloc[view]
+    gen_df.buffer(df1).spill(target=target)
+
+    header, frames = df1.device_serialize()
+    assert len(frames) == 1
+    if target == "gpu":
+        assert isinstance(frames[0], Buffer)
+        assert not gen_df.is_spilled(df1)
+        assert not gen_df.is_spillable(df1)
+        frames[0] = cupy.array(frames[0], copy=True)
+    else:
+        assert isinstance(frames[0], memoryview)
+        assert gen_df.is_spilled(df1)
+        assert gen_df.is_spillable(df1)
+
+    df2 = Serializable.device_deserialize(header, frames)
+    assert_eq(df1, df2)
+
+
+@pytest.mark.parametrize("target", ["gpu", "cpu"])
+@pytest.mark.parametrize("view", [None, slice(0, 2), slice(1, 3)])
+def test_serialize_host(manager, target, view):
+    df1 = gen_df()
+    if view is not None:
+        df1 = df1.iloc[view]
+    gen_df.buffer(df1).spill(target=target)
+
+    # Unspilled df becomes spilled after host serialization
+    header, frames = df1.host_serialize()
+    assert all(isinstance(f, memoryview) for f in frames)
+    df2 = Serializable.host_deserialize(header, frames)
+    assert gen_df.is_spilled(df2)
+    assert_eq(df1, df2)
+
+
+def test_serialize_dask_dataframe(manager: SpillManager):
+    protocol = pytest.importorskip("distributed.protocol")
+
+    df1 = gen_df(target="gpu")
+    header, frames = protocol.serialize(
+        df1, serializers=("dask",), on_error="raise"
+    )
+    buf: SpillableBuffer = gen_df.buffer(df1)
+    assert len(frames) == 1
+    assert isinstance(frames[0], memoryview)
+    # Check that the memoryview and frames is the same memory
+    assert (
+        np.array(buf.memoryview()).__array_interface__["data"]
+        == np.array(frames[0]).__array_interface__["data"]
+    )
+
+    df2 = protocol.deserialize(header, frames)
+    assert gen_df.is_spilled(df2)
+    assert_eq(df1, df2)
+
+
+def test_serialize_cuda_dataframe(manager: SpillManager):
+    protocol = pytest.importorskip("distributed.protocol")
+
+    df1 = gen_df(target="gpu")
+    header, frames = protocol.serialize(
+        df1, serializers=("cuda",), on_error="raise"
+    )
+    buf: SpillableBufferSlice = gen_df.buffer(df1)
+    assert len(buf._base._spill_locks) == 1
+    assert len(frames) == 1
+    assert isinstance(frames[0], Buffer)
+    assert frames[0].ptr == buf.ptr
+
+    frames[0] = cupy.array(frames[0], copy=True)
+    df2 = protocol.deserialize(header, frames)
+    assert_eq(df1, df2)
+
+
+def test_get_rmm_memory_resource_stack():
+    mr1 = rmm.mr.get_current_device_resource()
+    assert all(
+        not isinstance(m, rmm.mr.FailureCallbackResourceAdaptor)
+        for m in get_rmm_memory_resource_stack(mr1)
+    )
+
+    mr2 = rmm.mr.FailureCallbackResourceAdaptor(mr1, lambda x: False)
+    assert get_rmm_memory_resource_stack(mr2)[0] is mr2
+    assert get_rmm_memory_resource_stack(mr2)[1] is mr1
+
+    mr3 = rmm.mr.FixedSizeMemoryResource(mr2)
+    assert get_rmm_memory_resource_stack(mr3)[0] is mr3
+    assert get_rmm_memory_resource_stack(mr3)[1] is mr2
+    assert get_rmm_memory_resource_stack(mr3)[2] is mr1
+
+    mr4 = rmm.mr.FailureCallbackResourceAdaptor(mr3, lambda x: False)
+    assert get_rmm_memory_resource_stack(mr4)[0] is mr4
+    assert get_rmm_memory_resource_stack(mr4)[1] is mr3
+    assert get_rmm_memory_resource_stack(mr4)[2] is mr2
+    assert get_rmm_memory_resource_stack(mr4)[3] is mr1
+
+
+def test_df_transpose(manager: SpillManager):
+    df1 = cudf.DataFrame({"x": [1, 2]})
+    df2 = df1.transpose()
+    # For now, all buffers are marked as exposed
+    assert df1._data._data["x"].data.exposed
+    assert df2._data._data[0].data.exposed
+    assert df2._data._data[1].data.exposed
+
+
+@pytest.mark.parametrize("dtype", ["uint8", "uint64"])
+def test_memoryview_slice(manager: SpillManager, dtype):
+    """Check .memoryview() of a sliced spillable buffer"""
+
+    data = np.arange(10, dtype=dtype)
+    # memoryview of a sliced spillable buffer
+    m1 = as_buffer(data=data)[1:-1].memoryview()
+    # sliced memoryview of data as bytes
+    m2 = memoryview(data).cast("B")[1:-1]
+    assert m1 == m2
diff --git a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
index 4fc9e473fa3..bf459f22c16 100644
--- a/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
+++ b/python/strings_udf/strings_udf/_lib/cudf_jit_udf.pyx
@@ -24,7 +24,7 @@ def column_to_string_view_array(Column strings_col):
         c_buffer = move(cpp_to_string_view_array(input_view))
 
     device_buffer = DeviceBuffer.c_from_unique_ptr(move(c_buffer))
-    return as_buffer(device_buffer)
+    return as_buffer(device_buffer, exposed=True)
 
 
 def column_from_udf_string_array(DeviceBuffer d_buffer):

From 73d73a757bf03135cf259350021bbd111df9dd79 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Wed, 16 Nov 2022 13:51:50 -0800
Subject: [PATCH 180/202] Refactor `purge_nonempty_nulls` (#12111)

This refactor combines the discrete interfaces of `purge_nonempty_nulls` that require `structs/strings/lists_column_view` input into just one interface accepting just `column_view`. This facilitates easier usage of this function. It is also a necessary step for subsequent work in fixing `structs::superimpose_parent_nulls`.

`cudf::detail` interface for this new API is also added.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)
  - AJ Schmidt (https://github.com/ajschmidt8)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12111
---
 conda/recipes/libcudf/meta.yaml               |   1 -
 cpp/include/cudf/copying.hpp                  |  65 +++--------
 cpp/include/cudf/detail/copy.cuh              |  50 ---------
 cpp/include/cudf/detail/copy.hpp              |  10 ++
 cpp/src/copying/purge_nonempty_nulls.cu       |  42 +++----
 cpp/src/lists/set_operations.cu               |  12 +-
 .../copying/purge_nonempty_nulls_tests.cpp    | 105 +++++++++---------
 cpp/tests/io/parquet_test.cpp                 |   6 +-
 8 files changed, 104 insertions(+), 187 deletions(-)
 delete mode 100644 cpp/include/cudf/detail/copy.cuh

diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml
index 15d2fcc2a36..ceafc44ed10 100644
--- a/conda/recipes/libcudf/meta.yaml
+++ b/conda/recipes/libcudf/meta.yaml
@@ -78,7 +78,6 @@ outputs:
         - test -f $PREFIX/include/cudf/detail/binaryop.hpp
         - test -f $PREFIX/include/cudf/detail/calendrical_month_sequence.cuh
         - test -f $PREFIX/include/cudf/detail/concatenate.hpp
-        - test -f $PREFIX/include/cudf/detail/copy.cuh
         - test -f $PREFIX/include/cudf/detail/copy.hpp
         - test -f $PREFIX/include/cudf/detail/datetime.hpp
         - test -f $PREFIX/include/cudf/detail/fill.hpp
diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp
index 79dcaaaf00b..63c66335d2d 100644
--- a/cpp/include/cudf/copying.hpp
+++ b/cpp/include/cudf/copying.hpp
@@ -1012,12 +1012,19 @@ bool has_nonempty_nulls(column_view const& input);
 bool may_have_nonempty_nulls(column_view const& input);
 
 /**
- * @brief Copies `input`, purging any non-empty null rows in the column or its descendants
+ * @brief Copy `input` into output while purging any non-empty null rows in the column or its
+ * descendants.
  *
- * LIST columns may have non-empty null rows.
- * For example:
- * @code{.pseudo}
+ * If the input column is not of compound type (LIST/STRING/STRUCT/DICTIONARY), the output will be
+ * the same as input.
+ *
+ * The purge operation only applies directly to LIST and STRING columns, but it applies indirectly
+ * to STRUCT/DICTIONARY columns as well, since these columns may have child columns that
+ * are LIST or STRING.
  *
+ * Examples:
+ *
+ * @code{.pseudo}
  * auto const lists   = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} }.release();
  * cudf::detail::set_null_mask(lists->null_mask(), 1, 2, false);
  *
@@ -1027,33 +1034,13 @@ bool may_have_nonempty_nulls(column_view const& input);
  *   Offsets:  [0, 2, 4, 6]
  *   Child:    [0, 1, 2, 3, 4, 5]
  *
- * After purging the contents of the list's null rows, the column's contents
- * will be:
+ * After purging the contents of the list's null rows, the column's contents will be:
  *   Validity: 101
  *   Offsets:  [0, 2, 2, 4]
  *   Child:    [0, 1, 4, 5]
  * @endcode
  *
- * The purge operation only applies directly to LIST and STRING columns, but it
- * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
- * may have child/descendant columns that are LIST or STRING.
- *
- * @param input The column whose null rows are to be checked and purged
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
- * the contents of null rows purged
- */
-std::unique_ptr<column> purge_nonempty_nulls(
-  lists_column_view const& input,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Copies `input`, purging any non-empty null rows in the column or its descendants
- *
- * STRING columns may have non-empty null rows.
- * For example:
  * @code{.pseudo}
- *
  * auto const strings = strings_column_wrapper{ "AB", "CD", "EF" }.release();
  * cudf::detail::set_null_mask(strings->null_mask(), 1, 2, false);
  *
@@ -1070,26 +1057,7 @@ std::unique_ptr<column> purge_nonempty_nulls(
  *   Child:    [A, B, E, F]
  * @endcode
  *
- * The purge operation only applies directly to LIST and STRING columns, but it
- * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
- * may have child/descendant columns that are LIST or STRING.
- *
- * @param input The column whose null rows are to be checked and purged
- * @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
- * the contents of null rows purged
- */
-std::unique_ptr<column> purge_nonempty_nulls(
-  strings_column_view const& input,
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
-
-/**
- * @brief Copies `input`, purging any non-empty null rows in the column or its descendants
- *
- * STRUCTS columns may have null rows, with non-empty child rows.
- * For example:
  * @code{.pseudo}
- *
  * auto const lists   = lists_column_wrapper<int32_t>{ {0,1}, {2,3}, {4,5} };
  * auto const structs = structs_column_wrapper{ {lists}, null_at(1) };
  *
@@ -1106,17 +1074,12 @@ std::unique_ptr<column> purge_nonempty_nulls(
  *   Child:    [0, 1, 4, 5]
  * @endcode
  *
- * The purge operation only applies directly to LIST and STRING columns, but it
- * applies indirectly to STRUCT columns as well, since LIST and STRUCT columns
- * may have child/descendant columns that are LIST or STRING.
- *
  * @param input The column whose null rows are to be checked and purged
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return std::unique_ptr<column> Column with equivalent contents to `input`, but with
- * the contents of null rows purged
+ * @return A new column with equivalent contents to `input`, but with null rows purged
  */
 std::unique_ptr<column> purge_nonempty_nulls(
-  structs_column_view const& input,
+  column_view const& input,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
 /** @} */
diff --git a/cpp/include/cudf/detail/copy.cuh b/cpp/include/cudf/detail/copy.cuh
deleted file mode 100644
index 348f629a51a..00000000000
--- a/cpp/include/cudf/detail/copy.cuh
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2022, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/detail/copy.hpp>
-#include <cudf/detail/gather.cuh>
-
-#include <thrust/iterator/counting_iterator.h>
-
-namespace cudf::detail {
-
-/**
- * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*)
- *
- * @tparam ColumnViewT View type (lists_column_view, strings_column_view, or strings_column_view)
- * @param stream CUDA stream used for device memory operations and kernel launches
- */
-template <typename ColumnViewT>
-std::unique_ptr<cudf::column> purge_nonempty_nulls(ColumnViewT const& input,
-                                                   rmm::cuda_stream_view stream,
-                                                   rmm::mr::device_memory_resource* mr)
-{
-  // Implement via identity gather.
-  auto const input_column = input.parent();
-  auto const gather_begin = thrust::counting_iterator<cudf::size_type>(0);
-  auto const gather_end   = gather_begin + input_column.size();
-
-  auto gathered_table = cudf::detail::gather(table_view{{input_column}},
-                                             gather_begin,
-                                             gather_end,
-                                             out_of_bounds_policy::DONT_CHECK,
-                                             stream,
-                                             mr);
-  return std::move(gathered_table->release()[0]);
-}
-
-}  // namespace cudf::detail
diff --git a/cpp/include/cudf/detail/copy.hpp b/cpp/include/cudf/detail/copy.hpp
index 22453315762..8c3f315284d 100644
--- a/cpp/include/cudf/detail/copy.hpp
+++ b/cpp/include/cudf/detail/copy.hpp
@@ -315,5 +315,15 @@ bool has_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream);
  */
 bool may_have_nonempty_nulls(column_view const& input, rmm::cuda_stream_view stream);
 
+/**
+ * @copydoc cudf::purge_nonempty_nulls
+ *
+ * @param stream CUDA stream used for device memory operations and kernel launches.
+ */
+std::unique_ptr<column> purge_nonempty_nulls(
+  column_view const& input,
+  rmm::cuda_stream_view stream,
+  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
 }  // namespace detail
 }  // namespace cudf
diff --git a/cpp/src/copying/purge_nonempty_nulls.cu b/cpp/src/copying/purge_nonempty_nulls.cu
index ab3cfefd518..5bdf10c8af6 100644
--- a/cpp/src/copying/purge_nonempty_nulls.cu
+++ b/cpp/src/copying/purge_nonempty_nulls.cu
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 #include <cudf/copying.hpp>
-#include <cudf/detail/copy.cuh>
+#include <cudf/detail/gather.cuh>
 #include <cudf/utilities/default_stream.hpp>
 
 #include <thrust/count.h>
@@ -80,6 +80,24 @@ bool has_nonempty_nulls(cudf::column_view const& input, rmm::cuda_stream_view st
 
   return false;
 }
+
+std::unique_ptr<column> purge_nonempty_nulls(column_view const& input,
+                                             rmm::cuda_stream_view stream,
+                                             rmm::mr::device_memory_resource* mr)
+{
+  // If not compound types (LIST/STRING/STRUCT/DICTIONARY) then just copy the input into output.
+  if (!cudf::is_compound(input.type())) { return std::make_unique<column>(input, stream, mr); }
+
+  // Implement via identity gather.
+  auto gathered_table = cudf::detail::gather(table_view{{input}},
+                                             thrust::make_counting_iterator(0),
+                                             thrust::make_counting_iterator(input.size()),
+                                             out_of_bounds_policy::DONT_CHECK,
+                                             stream,
+                                             mr);
+  return std::move(gathered_table->release().front());
+}
+
 }  // namespace detail
 
 /**
@@ -110,27 +128,9 @@ bool has_nonempty_nulls(column_view const& input)
 }
 
 /**
- * @copydoc cudf::purge_nonempty_nulls(lists_column_view const&, rmm::mr::device_memory_resource*)
- */
-std::unique_ptr<cudf::column> purge_nonempty_nulls(lists_column_view const& input,
-                                                   rmm::mr::device_memory_resource* mr)
-{
-  return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
-}
-
-/**
- * @copydoc cudf::purge_nonempty_nulls(structs_column_view const&, rmm::mr::device_memory_resource*)
- */
-std::unique_ptr<cudf::column> purge_nonempty_nulls(structs_column_view const& input,
-                                                   rmm::mr::device_memory_resource* mr)
-{
-  return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
-}
-
-/**
- * @copydoc cudf::purge_nonempty_nulls(strings_column_view const&, rmm::mr::device_memory_resource*)
+ * @copydoc cudf::purge_nonempty_nulls(column_view const&, rmm::mr::device_memory_resource*)
  */
-std::unique_ptr<cudf::column> purge_nonempty_nulls(strings_column_view const& input,
+std::unique_ptr<cudf::column> purge_nonempty_nulls(column_view const& input,
                                                    rmm::mr::device_memory_resource* mr)
 {
   return detail::purge_nonempty_nulls(input, cudf::get_default_stream(), mr);
diff --git a/cpp/src/lists/set_operations.cu b/cpp/src/lists/set_operations.cu
index cc52478900a..a31b7c6e5be 100644
--- a/cpp/src/lists/set_operations.cu
+++ b/cpp/src/lists/set_operations.cu
@@ -17,7 +17,7 @@
 #include "utilities.hpp"
 
 #include <cudf/column/column_factories.hpp>
-#include <cudf/detail/copy.cuh>
+#include <cudf/detail/copy.hpp>
 #include <cudf/detail/copy_if.cuh>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
@@ -176,9 +176,8 @@ std::unique_ptr<column> intersect_distinct(lists_column_view const& lhs,
                                   stream,
                                   mr);
 
-  return null_count == 0
-           ? std::move(output)
-           : cudf::detail::purge_nonempty_nulls(lists_column_view{output->view()}, stream, mr);
+  return null_count == 0 ? std::move(output)
+                         : cudf::detail::purge_nonempty_nulls(output->view(), stream, mr);
 }
 
 std::unique_ptr<column> union_distinct(lists_column_view const& lhs,
@@ -253,9 +252,8 @@ std::unique_ptr<column> difference_distinct(lists_column_view const& lhs,
                                   stream,
                                   mr);
 
-  return null_count == 0
-           ? std::move(output)
-           : cudf::detail::purge_nonempty_nulls(lists_column_view{output->view()}, stream, mr);
+  return null_count == 0 ? std::move(output)
+                         : cudf::detail::purge_nonempty_nulls(output->view(), stream, mr);
 }
 
 }  // namespace detail
diff --git a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
index b917386648a..6dfd038b05c 100644
--- a/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
+++ b/cpp/tests/copying/purge_nonempty_nulls_tests.cpp
@@ -29,35 +29,33 @@
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/iterator_utilities.hpp>
 
-namespace cudf::test {
-
-using iterators::no_nulls;
-using iterators::null_at;
-using iterators::nulls_at;
+using cudf::test::iterators::no_nulls;
+using cudf::test::iterators::null_at;
+using cudf::test::iterators::nulls_at;
 using T             = int32_t;  // The actual type of the leaf node isn't really important.
-using values_col_t  = fixed_width_column_wrapper<T>;
-using offsets_col_t = fixed_width_column_wrapper<size_type>;
-using gather_map_t  = fixed_width_column_wrapper<size_type>;
+using values_col_t  = cudf::test::fixed_width_column_wrapper<T>;
+using offsets_col_t = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
+using gather_map_t  = cudf::test::fixed_width_column_wrapper<cudf::size_type>;
 
 template <typename T>
 using LCW = cudf::test::lists_column_wrapper<T, int32_t>;
 
 struct PurgeNonEmptyNullsTest : public cudf::test::BaseFixture {
   /// Helper to run gather() on a single column, and extract the single column from the result.
-  std::unique_ptr<cudf::column> gather(column_view const& input, gather_map_t const& gather_map)
+  std::unique_ptr<cudf::column> gather(cudf::column_view const& input,
+                                       gather_map_t const& gather_map)
   {
     auto gathered =
-      cudf::gather(cudf::table_view{{input}}, gather_map, out_of_bounds_policy::NULLIFY);
+      cudf::gather(cudf::table_view{{input}}, gather_map, cudf::out_of_bounds_policy::NULLIFY);
     return std::move(gathered->release()[0]);
   }
 
   /// Verify that the result of `sanitize()` is equivalent to the unsanitized input,
   /// except that the null rows are also empty.
-  template <typename ColumnViewT>
-  void test_purge(ColumnViewT const& unpurged)
+  void test_purge(cudf::column_view const& unpurged)
   {
     auto const purged = cudf::purge_nonempty_nulls(unpurged);
-    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(unpurged.parent(), *purged);
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(unpurged, *purged);
     EXPECT_FALSE(cudf::has_nonempty_nulls(*purged));
   }
 };
@@ -80,12 +78,12 @@ TEST_F(PurgeNonEmptyNullsTest, SingleLevelList)
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
-  test_purge(lists_column_view{*input});
+  test_purge(*input);
 
   {
     // Selecting all rows from input, in different order.
     auto const results           = gather(input->view(), {1, 2, 0, 3});
-    auto const results_list_view = lists_column_view(*results);
+    auto const results_list_view = cudf::lists_column_view(*results);
 
     auto const expected = LCW<T>{{{5},
                                   {},  // NULL.
@@ -129,7 +127,7 @@ TEST_F(PurgeNonEmptyNullsTest, SingleLevelList)
   {
     // Test when gather selects unsanitized row specifically.
     auto const results            = gather(input->view(), {2});
-    auto const results_lists_view = lists_column_view(*results);
+    auto const results_lists_view = cudf::lists_column_view(*results);
     auto const expected           = LCW<T>{{
                                    LCW<T>{}  // NULL.
                                  },
@@ -163,12 +161,12 @@ TEST_F(PurgeNonEmptyNullsTest, TwoLevelList)
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
-  test_purge(lists_column_view{*input});
+  test_purge(*input);
 
   {
     // Verify that gather() output is sanitized.
     auto const results            = gather(input->view(), {100, 3, 0, 1});
-    auto const results_lists_view = lists_column_view(*results);
+    auto const results_lists_view = cudf::lists_column_view(*results);
 
     auto const expected = LCW<T>{{
                                    LCW<T>{},  // NULL, because of out of bounds.
@@ -185,7 +183,7 @@ TEST_F(PurgeNonEmptyNullsTest, TwoLevelList)
       LCW<T>{
         {1, 2, 3}, {4, 5, 6, 7}, {8}, {9, 1}, {2}, {11, 12}, {13, 14, 15}, {16, 17, 18}, {19}});
 
-    auto const child_lists_view = lists_column_view(results_lists_view.child());
+    auto const child_lists_view = cudf::lists_column_view(results_lists_view.child());
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(child_lists_view.offsets(),
                                    offsets_col_t{0, 3, 7, 8, 10, 11, 13, 16, 19, 20});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(
@@ -218,11 +216,11 @@ TEST_F(PurgeNonEmptyNullsTest, ThreeLevelList)
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
-  test_purge(lists_column_view{*input});
+  test_purge(*input);
 
   {
     auto const results            = gather(input->view(), {100, 3, 0, 1});
-    auto const results_lists_view = lists_column_view(*results);
+    auto const results_lists_view = cudf::lists_column_view(*results);
 
     auto const expected = LCW<T>{
       {
@@ -253,7 +251,7 @@ TEST_F(PurgeNonEmptyNullsTest, ThreeLevelList)
 // List<string>.
 TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
 {
-  using T = string_view;
+  using T = cudf::string_view;
 
   auto const input = LCW<T>{{{{"1", "22", "", "4444"}, null_at(2)},
                              {"55555"},
@@ -272,12 +270,12 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*input));
 
-  test_purge(lists_column_view{*input});
+  test_purge(*input);
 
   {
     // Selecting all rows from input, in different order.
     auto const results           = gather(input->view(), {1, 2, 0, 3});
-    auto const results_list_view = lists_column_view(*results);
+    auto const results_list_view = cudf::lists_column_view(*results);
 
     auto const expected = LCW<T>{{{"55555"},
                                   {},  // NULL.
@@ -289,7 +287,7 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 1, 1, 5, 8});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(
       results_list_view.child(),
-      strings_column_wrapper{
+      cudf::test::strings_column_wrapper{
         {"55555", "1", "22", "", "4444", "88888888", "999999999", "1010101010"}, null_at(3)});
     EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
     EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
@@ -301,7 +299,7 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
     EXPECT_TRUE(cudf::has_nonempty_nulls(sliced));
 
     auto const results           = gather(sliced, {1, 2, 0, 3});
-    auto const results_list_view = lists_column_view(*results);
+    auto const results_list_view = cudf::lists_column_view(*results);
     auto const expected          = LCW<T>{{
                                    {},
                                    {"88888888", "999999999", "1010101010"},
@@ -313,7 +311,7 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_list_view.offsets(), offsets_col_t{0, 0, 3, 4, 8});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(
       results_list_view.child(),
-      strings_column_wrapper{
+      cudf::test::strings_column_wrapper{
         "88888888", "999999999", "1010101010", "55555", "11", "22", "33", "44"});
     EXPECT_TRUE(cudf::may_have_nonempty_nulls(*results));
     EXPECT_FALSE(cudf::has_nonempty_nulls(*results));
@@ -324,7 +322,7 @@ TEST_F(PurgeNonEmptyNullsTest, ListOfStrings)
 TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings)
 {
   auto strings =
-    strings_column_wrapper{
+    cudf::test::strings_column_wrapper{
       {"1", "22", "3", "44", "5", "66", "7", "8888", "9", "1010"},  //<--- "8888" will be
                                                                     // unsanitized.
       no_nulls()}
@@ -333,23 +331,24 @@ TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings)
   EXPECT_FALSE(cudf::has_nonempty_nulls(*strings));
 
   // Set strings nullmask, post construction.
-  set_null_mask(strings->mutable_view().null_mask(), 7, 8, false);
+  cudf::set_null_mask(strings->mutable_view().null_mask(), 7, 8, false);
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*strings));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*strings));
 
-  test_purge(strings_column_view{*strings});
+  test_purge(*strings);
 
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    strings_column_view(*strings).offsets(), offsets_col_t{0, 1, 3, 4, 6, 7, 9, 10, 14, 15, 19}
-    // 10-14 indicates that "8888" is unsanitized.
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(cudf::strings_column_view(*strings).offsets(),
+                                 offsets_col_t{0, 1, 3, 4, 6, 7, 9, 10, 14, 15, 19}
+                                 // 10-14 indicates that "8888" is unsanitized.
   );
 
   // Construct a list column from the strings column.
-  auto const lists = make_lists_column(4,
-                                       offsets_col_t{0, 4, 5, 7, 10}.release(),
-                                       std::move(strings),
-                                       0,
-                                       detail::make_null_mask(no_nulls(), no_nulls() + 4));
+  auto const lists =
+    cudf::make_lists_column(4,
+                            offsets_col_t{0, 4, 5, 7, 10}.release(),
+                            std::move(strings),
+                            0,
+                            cudf::test::detail::make_null_mask(no_nulls(), no_nulls() + 4));
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));
 
@@ -359,32 +358,32 @@ TEST_F(PurgeNonEmptyNullsTest, UnsanitizedListOfUnsanitizedStrings)
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*lists));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*lists));
 
-  test_purge(lists_column_view{*lists});
+  test_purge(*lists);
 
   // At this point,
   // 1. {"66", "7"} will be unsanitized.
   // 2. {"8888", "9", "1010"} will be actually be {NULL, "9", "1010"}.
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
-    lists_column_view(*lists).offsets(),
+    cudf::lists_column_view(*lists).offsets(),
     offsets_col_t{0, 4, 5, 7, 10});  // 5-7 indicates that list row#2 is unsanitized.
 
   auto const result   = gather(lists->view(), {1, 2, 0, 3});
-  auto const expected = LCW<string_view>{{{"5"},
-                                          {},  // NULL.
-                                          {"1", "22", "3", "44"},
-                                          {{"", "9", "1010"}, null_at(0)}},
-                                         null_at(1)};
+  auto const expected = LCW<cudf::string_view>{{{"5"},
+                                                {},  // NULL.
+                                                {"1", "22", "3", "44"},
+                                                {{"", "9", "1010"}, null_at(0)}},
+                                               null_at(1)};
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, expected);
 
   // Ensure row#2 has been sanitized.
-  auto const results_lists_view = lists_column_view(*result);
+  auto const results_lists_view = cudf::lists_column_view(*result);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_lists_view.offsets(), offsets_col_t{0, 1, 1, 5, 8}
                                  // 1-1 indicates that row#2 is sanitized.
   );
 
   // Ensure that "8888" has been sanitized, and stored as "".
-  auto const child_strings_view = strings_column_view(results_lists_view.child());
+  auto const child_strings_view = cudf::strings_column_view(results_lists_view.child());
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(child_strings_view.offsets(),
                                  offsets_col_t{0, 1, 2, 4, 5, 7, 7, 8, 12});
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*result));
@@ -402,18 +401,18 @@ TEST_F(PurgeNonEmptyNullsTest, StructOfList)
                            {8, 9, 10}},
                           no_nulls()};
       EXPECT_FALSE(cudf::has_nonempty_nulls(child));
-      return structs_column_wrapper{{child}, null_at(2)};
+      return cudf::test::structs_column_wrapper{{child}, null_at(2)};
     }()
       .release();
 
   EXPECT_TRUE(cudf::may_have_nonempty_nulls(*structs_input));
   EXPECT_TRUE(cudf::has_nonempty_nulls(*structs_input));
 
-  test_purge(structs_column_view{*structs_input});
+  test_purge(*structs_input);
 
   // At this point, even though the structs column has a null at index 2,
   // the child column has a non-empty list row at index 2: {6, 7}.
-  CUDF_TEST_EXPECT_COLUMNS_EQUAL(lists_column_view(structs_input->child(0)).child(),
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(cudf::lists_column_view(structs_input->child(0)).child(),
                                  values_col_t{{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, null_at(2)});
 
   {
@@ -426,11 +425,11 @@ TEST_F(PurgeNonEmptyNullsTest, StructOfList)
                            {{1, 2, 3, 4}, null_at(2)},
                            {8, 9, 10}},
                           null_at(1)};
-      return structs_column_wrapper{{child}, null_at(1)};
+      return cudf::test::structs_column_wrapper{{child}, null_at(1)};
     }();
 
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected_result);
-    auto const results_child = lists_column_view(result->child(0));
+    auto const results_child = cudf::lists_column_view(result->child(0));
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_child.offsets(), offsets_col_t{0, 1, 1, 5, 8});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(results_child.child(),
                                    values_col_t{{5, 1, 2, 3, 4, 8, 9, 10}, null_at(3)});
@@ -438,5 +437,3 @@ TEST_F(PurgeNonEmptyNullsTest, StructOfList)
     EXPECT_FALSE(cudf::has_nonempty_nulls(*result));
   }
 }
-
-}  // namespace cudf::test
diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp
index ba457e2738d..2f59a740454 100644
--- a/cpp/tests/io/parquet_test.cpp
+++ b/cpp/tests/io/parquet_test.cpp
@@ -2698,7 +2698,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
                                      c1_floats.release(),
                                      cudf::UNKNOWN_NULL_COUNT,
                                      cudf::test::detail::make_null_mask(valids, valids + num_rows));
-  auto c1  = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c1));
+  auto c1  = cudf::purge_nonempty_nulls(*_c1);
 
   // list<list<int>>
   auto c2 = make_parquet_list_list_col<int>(0, num_rows, 5, 8, true);
@@ -2727,7 +2727,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
                             string_col.release(),
                             cudf::UNKNOWN_NULL_COUNT,
                             cudf::test::detail::make_null_mask(valids, valids + num_rows));
-  auto c3_list = cudf::purge_nonempty_nulls(static_cast<cudf::lists_column_view>(*_c3_list));
+  auto c3_list = cudf::purge_nonempty_nulls(*_c3_list);
   cudf::test::fixed_width_column_wrapper<int> c3_ints(values, values + num_rows, valids);
   cudf::test::fixed_width_column_wrapper<float> c3_floats(values, values + num_rows, valids);
   std::vector<std::unique_ptr<cudf::column>> c3_children;
@@ -2735,7 +2735,7 @@ TEST_F(ParquetReaderTest, UserBoundsWithNullsMixedTypes)
   c3_children.push_back(c3_ints.release());
   c3_children.push_back(c3_floats.release());
   cudf::test::structs_column_wrapper _c3(std::move(c3_children), c3_valids);
-  auto c3 = cudf::purge_nonempty_nulls(static_cast<cudf::structs_column_view>(_c3));
+  auto c3 = cudf::purge_nonempty_nulls(_c3);
 
   // write it out
   cudf::table_view tbl({c0, *c1, *c2, *c3});

From ae101ccf3aeeb75ee248aa7c30a61171c54aca65 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 16 Nov 2022 22:49:42 +0000
Subject: [PATCH 181/202] Don't rely on GNU find in headers_test.sh (#12164)

`-printf` is a GNU find extension, so `headers_test.sh` fails on systems where binutils is a BSD toolchain.

To get around this, use sed to obtain the effect of `-printf`.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cudf/pull/12164
---
 ci/checks/headers_test.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ci/checks/headers_test.sh b/ci/checks/headers_test.sh
index 502bdca0fa7..b859009a8c5 100755
--- a/ci/checks/headers_test.sh
+++ b/ci/checks/headers_test.sh
@@ -10,7 +10,7 @@ DIRNAMES="cudf cudf_test"
 
 # existence tests for lib${LIBNAME}
 for DIRNAME in ${DIRNAMES[@]}; do
-    HEADERS=`cd cpp && find include/${DIRNAME}/ -type f \( -iname "*.h" -o  -iname "*.hpp" \) -printf "        - test -f \\\$PREFIX/%p\n" | sort`
+    HEADERS=`cd cpp && find include/${DIRNAME} -type f \( -iname "*.h" -o  -iname "*.hpp" \) -print | sed 's|^|        - test -f $PREFIX/|' | sort`
     META_TESTS=`grep -E "test -f .*/include/${DIRNAME}/.*\.h(pp)?" conda/recipes/lib${LIBNAME}/meta.yaml | sort`
     HEADER_DIFF=`diff <(echo "$HEADERS") <(echo "$META_TESTS")`
     LIB_RETVAL=$?

From 6de2c4e7c98a40551924c9c8892ce59fc1b771cf Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 16 Nov 2022 21:58:16 -0800
Subject: [PATCH 182/202] Fix issues when both `usecols` and `names` options
 are used in `read_csv` (#12018)

closes https://github.com/rapidsai/cudf/issues/8973
CSV reader has a few gaps in the logic for column selection and user specified column names:
1. Users cannot only specify the names of selected columns;
2. Reader fails in unpredictable ways when only a subset of column names is passed (w/o column selection);

This PR fixes the issues above. Users can now specify column names (can be lower than the actual number of columns) or names of columns selected via their indices (must match the number of indices). If selection via indices is used, the number of column names has to match either the actual number of columns, or the number of selected columns.

Also fixed test an error that went unnoticed due to issues above.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Karthikeyan (https://github.com/karthikeyann)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Nghia Truong (https://github.com/ttnghia)
  - https://github.com/nvdbaranec

URL: https://github.com/rapidsai/cudf/pull/12018
---
 cpp/src/io/csv/reader_impl.cu      |  93 +++++++++++++--------
 cpp/tests/io/csv_test.cpp          | 129 ++++++++++++++++++++++++++++-
 python/cudf/cudf/tests/test_csv.py |  38 +++++++++
 python/cudf/cudf/utils/ioutils.py  |  12 ++-
 4 files changed, 233 insertions(+), 39 deletions(-)

diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index f812f272c25..075e9e2c965 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -676,32 +676,37 @@ table_with_metadata read_csv(cudf::io::datasource* source,
   auto const& data        = data_row_offsets.first;
   auto const& row_offsets = data_row_offsets.second;
 
-  // Exclude the end-of-data row from number of rows with actual data
-  auto num_records        = std::max(row_offsets.size(), 1ul) - 1;
-  auto column_flags       = std::vector<column_parse::flags>();
-  auto column_names       = std::vector<std::string>();
-  auto num_actual_columns = static_cast<int32_t>(reader_opts.get_names().size());
-  auto num_active_columns = num_actual_columns;
-
-  // Check if the user gave us a list of column names
-  if (not reader_opts.get_names().empty()) {
-    column_flags.resize(reader_opts.get_names().size(),
-                        column_parse::enabled | column_parse::inferred);
-    column_names = reader_opts.get_names();
-  } else {
-    column_names = get_column_names(
-      header, parse_opts.view(), reader_opts.get_header(), reader_opts.get_prefix());
-
-    num_actual_columns = num_active_columns = column_names.size();
-
-    column_flags.resize(num_actual_columns, column_parse::enabled | column_parse::inferred);
-
+  auto const unique_use_cols_indexes = std::set(reader_opts.get_use_cols_indexes().cbegin(),
+                                                reader_opts.get_use_cols_indexes().cend());
+
+  auto const detected_column_names =
+    get_column_names(header, parse_opts.view(), reader_opts.get_header(), reader_opts.get_prefix());
+  auto const opts_have_all_col_names =
+    not reader_opts.get_names().empty() and
+    (
+      // no data to detect (the number of) columns
+      detected_column_names.empty() or
+      // number of user specified names matches what is detected
+      reader_opts.get_names().size() == detected_column_names.size() or
+      // Columns are not selected by indices; read first reader_opts.get_names().size() columns
+      unique_use_cols_indexes.empty());
+  auto column_names = opts_have_all_col_names ? reader_opts.get_names() : detected_column_names;
+
+  auto const num_actual_columns = static_cast<int32_t>(column_names.size());
+  auto num_active_columns       = num_actual_columns;
+  auto column_flags             = std::vector<column_parse::flags>(
+    num_actual_columns, column_parse::enabled | column_parse::inferred);
+
+  // User did not pass column names to override names in the file
+  // Process names from the file to remove empty and duplicated strings
+  if (not opts_have_all_col_names) {
     std::vector<size_t> col_loop_order(column_names.size());
     auto unnamed_it = std::copy_if(
       thrust::make_counting_iterator<size_t>(0),
       thrust::make_counting_iterator<size_t>(column_names.size()),
       col_loop_order.begin(),
       [&column_names](auto col_idx) -> bool { return not column_names[col_idx].empty(); });
+
     // Rename empty column names to "Unnamed: col_index"
     std::copy_if(thrust::make_counting_iterator<size_t>(0),
                  thrust::make_counting_iterator<size_t>(column_names.size()),
@@ -756,24 +761,44 @@ table_with_metadata read_csv(cudf::io::datasource* source,
   }
 
   // User can specify which columns should be parsed
-  if (!reader_opts.get_use_cols_indexes().empty() || !reader_opts.get_use_cols_names().empty()) {
+  auto const unique_use_cols_names = std::unordered_set(reader_opts.get_use_cols_names().cbegin(),
+                                                        reader_opts.get_use_cols_names().cend());
+  auto const is_column_selection_used =
+    not unique_use_cols_names.empty() or not unique_use_cols_indexes.empty();
+
+  // Reset flags and output column count; columns will be reactivated based on the selection options
+  if (is_column_selection_used) {
     std::fill(column_flags.begin(), column_flags.end(), column_parse::disabled);
+    num_active_columns = 0;
+  }
+
+  // Column selection via column indexes
+  if (not unique_use_cols_indexes.empty()) {
+    // Users can pass names for the selected columns only, if selecting column by their indices
+    auto const are_opts_col_names_used =
+      not reader_opts.get_names().empty() and not opts_have_all_col_names;
+    CUDF_EXPECTS(not are_opts_col_names_used or
+                   reader_opts.get_names().size() == unique_use_cols_indexes.size(),
+                 "Specify names of all columns in the file, or names of all selected columns");
 
-    for (const auto index : reader_opts.get_use_cols_indexes()) {
+    for (auto const index : unique_use_cols_indexes) {
       column_flags[index] = column_parse::enabled | column_parse::inferred;
+      if (are_opts_col_names_used) {
+        column_names[index] = reader_opts.get_names()[num_active_columns];
+      }
+      ++num_active_columns;
     }
-    num_active_columns = std::unordered_set<int>(reader_opts.get_use_cols_indexes().begin(),
-                                                 reader_opts.get_use_cols_indexes().end())
-                           .size();
+  }
 
-    for (const auto& name : reader_opts.get_use_cols_names()) {
-      const auto it = std::find(column_names.begin(), column_names.end(), name);
-      if (it != column_names.end()) {
-        auto curr_it = it - column_names.begin();
-        if (column_flags[curr_it] == column_parse::disabled) {
-          column_flags[curr_it] = column_parse::enabled | column_parse::inferred;
-          num_active_columns++;
-        }
+  // Column selection via column names
+  if (not unique_use_cols_names.empty()) {
+    for (auto const& name : unique_use_cols_names) {
+      auto const it = std::find(column_names.cbegin(), column_names.cend(), name);
+      CUDF_EXPECTS(it != column_names.end(), "Nonexistent column selected");
+      auto const col_idx = std::distance(column_names.cbegin(), it);
+      if (column_flags[col_idx] == column_parse::disabled) {
+        column_flags[col_idx] = column_parse::enabled | column_parse::inferred;
+        ++num_active_columns;
       }
     }
   }
@@ -810,6 +835,8 @@ table_with_metadata read_csv(cudf::io::datasource* source,
   // Return empty table rather than exception if nothing to load
   if (num_active_columns == 0) { return {std::make_unique<table>(), {}}; }
 
+  // Exclude the end-of-data row from number of rows with actual data
+  auto const num_records  = std::max(row_offsets.size(), 1ul) - 1;
   auto const column_types = determine_column_types(
     reader_opts, parse_opts, column_names, data, row_offsets, num_records, column_flags, stream);
 
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index eeca87446ec..17fddffc93e 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -904,7 +904,7 @@ TEST_F(CsvReaderTest, Strings)
   auto filepath = temp_env->get_temp_dir() + "Strings.csv";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << names[0] << ',' << names[1] << ',' << '\n';
+    outfile << names[0] << ',' << names[1] << '\n';
     outfile << "10,abc def ghi" << '\n';
     outfile << "20,\"jkl mno pqr\"" << '\n';
     outfile << "30,stu \"\"vwx\"\" yz" << '\n';
@@ -934,7 +934,7 @@ TEST_F(CsvReaderTest, StringsQuotes)
   auto filepath = temp_env->get_temp_dir() + "StringsQuotes.csv";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << names[0] << ',' << names[1] << ',' << '\n';
+    outfile << names[0] << ',' << names[1] << '\n';
     outfile << "10,`abc,\ndef, ghi`" << '\n';
     outfile << "20,`jkl, ``mno``, pqr`" << '\n';
     outfile << "30,stu `vwx` yz" << '\n';
@@ -963,7 +963,7 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored)
   auto filepath = temp_env->get_temp_dir() + "StringsQuotesIgnored.csv";
   {
     std::ofstream outfile(filepath, std::ofstream::out);
-    outfile << names[0] << ',' << names[1] << ',' << '\n';
+    outfile << names[0] << ',' << names[1] << '\n';
     outfile << "10,\"abcdef ghi\"" << '\n';
     outfile << "20,\"jkl \"\"mno\"\" pqr\"" << '\n';
     outfile << "30,stu \"vwx\" yz" << '\n';
@@ -2244,6 +2244,129 @@ TEST_F(CsvReaderTest, CsvDefaultOptionsWriteReadMatch)
   EXPECT_EQ(new_table_and_metadata.metadata.column_names[1], "1");
 }
 
+TEST_F(CsvReaderTest, UseColsValidation)
+{
+  const std::string buffer = "1,2,3";
+
+  const cudf::io::csv_reader_options idx_cnt_options =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
+      .names({"a", "b"})
+      .use_cols_indexes({0});
+  EXPECT_THROW(cudf::io::read_csv(idx_cnt_options), cudf::logic_error);
+
+  cudf::io::csv_reader_options unique_idx_cnt_options =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
+      .names({"a", "b"})
+      .use_cols_indexes({0, 0});
+  EXPECT_THROW(cudf::io::read_csv(unique_idx_cnt_options), cudf::logic_error);
+
+  cudf::io::csv_reader_options bad_name_options =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{buffer.c_str(), buffer.size()})
+      .names({"a", "b", "c"})
+      .use_cols_names({"nonexistent_name"});
+  EXPECT_THROW(cudf::io::read_csv(bad_name_options), cudf::logic_error);
+}
+
+TEST_F(CsvReaderTest, CropColumns)
+{
+  const std::string csv_in{"12,9., 10\n34,8., 20\n56,7., 30"};
+
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+      .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<float>()})
+      .names({"a", "b"})
+      .header(-1);
+  const auto result = cudf::io::read_csv(in_opts);
+
+  const auto result_table = result.tbl->view();
+  ASSERT_EQ(result_table.num_columns(), 2);
+  ASSERT_EQ(result_table.column(0).type(), data_type{type_id::INT32});
+  ASSERT_EQ(result_table.column(1).type(), data_type{type_id::FLOAT32});
+  expect_column_data_equal(std::vector<int32_t>{12, 34, 56}, result_table.column(0));
+  expect_column_data_equal(std::vector<float>{9., 8., 7.}, result_table.column(1));
+}
+
+TEST_F(CsvReaderTest, CropColumnsUseColsNames)
+{
+  std::string csv_in{"12,9., 10\n34,8., 20\n56,7., 30"};
+
+  cudf::io::csv_reader_options in_opts =
+    cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+      .dtypes(std::vector<data_type>{dtype<int32_t>(), dtype<float>()})
+      .names({"a", "b"})
+      .use_cols_names({"b"})
+      .header(-1);
+  auto result = cudf::io::read_csv(in_opts);
+
+  const auto result_table = result.tbl->view();
+  ASSERT_EQ(result_table.num_columns(), 1);
+  ASSERT_EQ(result_table.column(0).type(), data_type{type_id::FLOAT32});
+  expect_column_data_equal(std::vector<float>{9., 8., 7.}, result_table.column(0));
+}
+
+TEST_F(CsvReaderTest, ExtraColumns)
+{
+  std::string csv_in{"12,9., 10\n34,8., 20\n56,7., 30"};
+  {
+    cudf::io::csv_reader_options opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+        .names({"a", "b", "c", "d"})
+        .header(-1);
+    auto result = cudf::io::read_csv(opts);
+
+    const auto result_table = result.tbl->view();
+    ASSERT_EQ(result_table.num_columns(), 4);
+    ASSERT_EQ(result_table.column(3).type(), data_type{type_id::INT8});
+    ASSERT_EQ(result_table.column(3).null_count(), 3);
+  }
+  {
+    cudf::io::csv_reader_options with_dtypes_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+        .names({"a", "b", "c", "d"})
+        .dtypes({dtype<int32_t>(), dtype<int32_t>(), dtype<int32_t>(), dtype<float>()})
+        .header(-1);
+    auto result = cudf::io::read_csv(with_dtypes_opts);
+
+    const auto result_table = result.tbl->view();
+    ASSERT_EQ(result_table.num_columns(), 4);
+    ASSERT_EQ(result_table.column(3).type(), data_type{type_id::FLOAT32});
+    ASSERT_EQ(result_table.column(3).null_count(), 3);
+  }
+}
+
+TEST_F(CsvReaderTest, ExtraColumnsUseCols)
+{
+  std::string csv_in{"12,9., 10\n34,8., 20\n56,7., 30"};
+
+  {
+    cudf::io::csv_reader_options in_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+        .names({"a", "b", "c", "d"})
+        .use_cols_names({"b", "d"})
+        .header(-1);
+    auto result = cudf::io::read_csv(in_opts);
+
+    const auto result_table = result.tbl->view();
+    ASSERT_EQ(result_table.num_columns(), 2);
+    ASSERT_EQ(result_table.column(1).type(), data_type{type_id::INT8});
+    ASSERT_EQ(result_table.column(1).null_count(), 3);
+  }
+  {
+    cudf::io::csv_reader_options with_dtypes_opts =
+      cudf::io::csv_reader_options::builder(cudf::io::source_info{csv_in.c_str(), csv_in.size()})
+        .names({"a", "b", "c", "d"})
+        .use_cols_names({"b", "d"})
+        .dtypes({dtype<int32_t>(), dtype<int32_t>(), dtype<int32_t>(), dtype<cudf::string_view>()})
+        .header(-1);
+    auto result = cudf::io::read_csv(with_dtypes_opts);
+
+    const auto result_table = result.tbl->view();
+    ASSERT_EQ(result_table.num_columns(), 2);
+    ASSERT_EQ(result_table.column(1).type(), data_type{type_id::STRING});
+    ASSERT_EQ(result_table.column(1).null_count(), 3);
+  }
+}
+
 TEST_F(CsvReaderTest, EmptyColumns)
 {
   // First column only has empty fields. second column contains only "null" literals
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index e85d404d2c4..7e62f63b0e2 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -2205,3 +2205,41 @@ def test_default_float_bitwidth_partial(default_float_bitwidth):
     )
     assert read["float1"].dtype == np.dtype(f"f{default_float_bitwidth//8}")
     assert read["float2"].dtype == np.dtype("f8")
+
+
+@pytest.mark.parametrize(
+    "usecols,names",
+    [
+        # selection using indices; only names of selected columns are specified
+        ([1, 2], ["b", "c"]),
+        # selection using indices; names of all columns are specified
+        ([1, 2], ["a", "b", "c"]),
+        # selection using indices; duplicates
+        ([2, 2], ["a", "b", "c"]),
+        # selection using indices; out of order
+        ([2, 1], ["a", "b", "c"]),
+        # selection using names
+        (["b"], ["a", "b", "c"]),
+        # selection using names; multiple columns
+        (["b", "c"], ["a", "b", "c"]),
+        # selection using names; duplicates
+        (["c", "c"], ["a", "b", "c"]),
+        # selection using names; out of order
+        (["c", "b"], ["a", "b", "c"]),
+    ],
+)
+def test_column_selection_plus_column_names(usecols, names):
+
+    lines = [
+        "num,datetime,text",
+        "123,2018-11-13T12:00:00,abc",
+        "456,2018-11-14T12:35:01,def",
+        "789,2018-11-15T18:02:59,ghi",
+    ]
+
+    buffer = "\n".join(lines) + "\n"
+
+    assert_eq(
+        pd.read_csv(StringIO(buffer), usecols=usecols, names=names),
+        cudf.read_csv(StringIO(buffer), usecols=usecols, names=names),
+    )
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 2c4b73666a5..96d4ea891b1 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1029,16 +1029,22 @@
     the column names: if no names are passed, header=0;
     if column names are passed explicitly, header=None.
 names : list of str, default None
-    List of column names to be used.
+    List of column names to be used. Needs to include names of all columns in
+    the file, or names of all columns selected using `usecols` (only when
+    `usecols` holds integer indices). When `usecols` is not used to select
+    column indices, `names` can contain more names than there are columns i.n
+    the file. In this case the extra columns will only contain null rows.
 index_col : int, string or False, default None
     Column to use as the row labels of the DataFrame. Passing `index_col=False`
     explicitly disables index column inference and discards the last column.
 usecols : list of int or str, default None
     Returns subset of the columns given in the list. All elements must be
     either integer indices (column number) or strings that correspond to
-    column names
+    column names. When an integer index is passed for each name in the `names`
+    parameter, the names are interpreted as names in the output table, not as
+    names in the input file.
 prefix : str, default None
-    Prefix to add to column numbers when parsing without a header row
+    Prefix to add to column numbers when parsing without a header row.
 mangle_dupe_cols : boolean, default True
     Duplicate columns will be specified as 'X','X.1',...'X.N'.
 dtype : type, str, list of types, or dict of column -> type, default None

From aa13b955fa079dc1f1d526bb25a11bd3cb1576d8 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 17 Nov 2022 00:39:08 -0600
Subject: [PATCH 183/202] Support `upper` and `lower` in `strings_udf` (#12099)

This PR adds support for the following two functions in `strings_udf`:

- `str.upper()`
- `str.lower()`

Part of https://github.com/rapidsai/cudf/issues/9639

Authors:
  - https://github.com/brandon-b-miller
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Lawrence Mitchell (https://github.com/wence-)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/12099
---
 python/cudf/cudf/core/udf/strings_lowering.py | 58 +++++++----
 python/cudf/cudf/core/udf/strings_typing.py   | 14 ++-
 python/cudf/cudf/tests/test_udf_masked_ops.py | 16 ++++
 .../strings_udf/cpp/src/strings/udf/shim.cu   | 64 +++++++++++--
 .../cpp/src/strings/udf/udf_apis.cu           |  3 +-
 .../strings_udf/_lib/cpp/strings_udf.pxd      |  4 +-
 .../strings_udf/strings_udf/_lib/tables.pyx   | 16 +++-
 python/strings_udf/strings_udf/_typing.py     |  8 ++
 python/strings_udf/strings_udf/lowering.py    | 95 ++++++++++++++++++-
 .../strings_udf/tests/test_string_udfs.py     | 14 +++
 10 files changed, 254 insertions(+), 38 deletions(-)

diff --git a/python/cudf/cudf/core/udf/strings_lowering.py b/python/cudf/cudf/core/udf/strings_lowering.py
index fdfd013bad7..465866cdd55 100644
--- a/python/cudf/cudf/core/udf/strings_lowering.py
+++ b/python/cudf/cudf/core/udf/strings_lowering.py
@@ -22,11 +22,13 @@
     istitle_impl,
     isupper_impl,
     len_impl,
+    lower_impl,
     lstrip_impl,
     rfind_impl,
     rstrip_impl,
     startswith_impl,
     strip_impl,
+    upper_impl,
 )
 
 from cudf.core.udf.masked_typing import MaskedType
@@ -82,25 +84,6 @@ def masked_binary_func_impl(context, builder, sig, args):
     )
 
 
-create_binary_string_func("MaskedType.strip", strip_impl, udf_string)
-
-create_binary_string_func("MaskedType.lstrip", lstrip_impl, udf_string)
-
-create_binary_string_func("MaskedType.rstrip", rstrip_impl, udf_string)
-
-
-create_binary_string_func(
-    "MaskedType.startswith",
-    startswith_impl,
-    types.boolean,
-)
-create_binary_string_func("MaskedType.endswith", endswith_impl, types.boolean)
-create_binary_string_func("MaskedType.find", find_impl, size_type)
-create_binary_string_func("MaskedType.rfind", rfind_impl, size_type)
-create_binary_string_func("MaskedType.count", count_impl, size_type)
-create_binary_string_func(operator.contains, contains_impl, types.boolean)
-
-
 def create_masked_unary_identifier_func(op, cuda_func):
     """
     Provide a wrapper around numba's low-level extension API which
@@ -127,6 +110,41 @@ def masked_unary_func_impl(context, builder, sig, args):
     cuda_lower(op, MaskedType(string_view))(masked_unary_func_impl)
 
 
+def create_masked_upper_or_lower(op, cuda_func):
+    def upper_or_lower_impl(context, builder, sig, args):
+        ret = cgutils.create_struct_proxy(sig.return_type)(context, builder)
+        masked_str = cgutils.create_struct_proxy(sig.args[0])(
+            context, builder, value=args[0]
+        )
+
+        result = cuda_func(
+            context,
+            builder,
+            udf_string(string_view),
+            (masked_str.value,),
+        )
+        ret.value = result
+        ret.valid = masked_str.valid
+        return ret._getvalue()
+
+    cuda_lower(op, MaskedType(string_view))(upper_or_lower_impl)
+
+
+create_binary_string_func("MaskedType.strip", strip_impl, udf_string)
+create_binary_string_func("MaskedType.lstrip", lstrip_impl, udf_string)
+create_binary_string_func("MaskedType.rstrip", rstrip_impl, udf_string)
+create_binary_string_func(
+    "MaskedType.startswith",
+    startswith_impl,
+    types.boolean,
+)
+create_binary_string_func("MaskedType.endswith", endswith_impl, types.boolean)
+create_binary_string_func("MaskedType.find", find_impl, size_type)
+create_binary_string_func("MaskedType.rfind", rfind_impl, size_type)
+create_binary_string_func("MaskedType.count", count_impl, size_type)
+create_binary_string_func(operator.contains, contains_impl, types.boolean)
+
+
 create_masked_unary_identifier_func("MaskedType.isalnum", isalnum_impl)
 create_masked_unary_identifier_func("MaskedType.isalpha", isalpha_impl)
 create_masked_unary_identifier_func("MaskedType.isdigit", isdigit_impl)
@@ -135,3 +153,5 @@ def masked_unary_func_impl(context, builder, sig, args):
 create_masked_unary_identifier_func("MaskedType.isspace", isspace_impl)
 create_masked_unary_identifier_func("MaskedType.isdecimal", isdecimal_impl)
 create_masked_unary_identifier_func("MaskedType.istitle", istitle_impl)
+create_masked_upper_or_lower("MaskedType.upper", upper_impl)
+create_masked_upper_or_lower("MaskedType.lower", lower_impl)
diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index e8a35c12f71..87500cba564 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -14,6 +14,7 @@
     int_binary_funcs,
     size_type,
     string_return_attrs,
+    string_unary_funcs,
     string_view,
     udf_string,
 )
@@ -123,7 +124,7 @@ def attr(self, mod):
     return attr
 
 
-def create_masked_identifier_attr(attrname):
+def create_masked_unary_attr(attrname, retty):
     """
     Helper function wrapping numba's low level extension API. Provides
     the boilerplate needed to register a unary function of a masked
@@ -134,7 +135,7 @@ class MaskedStringViewIdentifierAttr(AbstractTemplate):
         key = attrname
 
         def generic(self, args, kws):
-            return nb_signature(MaskedType(types.boolean), recvr=self.this)
+            return nb_signature(MaskedType(retty), recvr=self.this)
 
     def attr(self, mod):
         return types.BoundFunction(
@@ -195,7 +196,14 @@ def resolve_valid(self, mod):
     setattr(
         MaskedStringViewAttrs,
         f"resolve_{func}",
-        create_masked_identifier_attr(f"MaskedType.{func}"),
+        create_masked_unary_attr(f"MaskedType.{func}", types.boolean),
+    )
+
+for func in string_unary_funcs:
+    setattr(
+        MaskedStringViewAttrs,
+        f"resolve_{func}",
+        create_masked_unary_attr(f"MaskedType.{func}", udf_string),
     )
 
 cuda_decl_registry.register_attr(MaskedStringViewAttrs)
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index fbe6b3f8888..72abc8e9f87 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -903,6 +903,22 @@ def func(row):
     run_masked_udf_test(func, str_udf_data, check_dtype=False)
 
 
+@string_udf_test
+def test_string_udf_upper(str_udf_data):
+    def func(row):
+        return row["str_col"].upper()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
+@string_udf_test
+def test_string_udf_lower(str_udf_data):
+    def func(row):
+        return row["str_col"].lower()
+
+    run_masked_udf_test(func, str_udf_data, check_dtype=False)
+
+
 @string_udf_test
 @pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
 def test_string_udf_concat(str_udf_data, concat_char):
diff --git a/python/strings_udf/cpp/src/strings/udf/shim.cu b/python/strings_udf/cpp/src/strings/udf/shim.cu
index 8fc158d7eb7..c5a446c9518 100644
--- a/python/strings_udf/cpp/src/strings/udf/shim.cu
+++ b/python/strings_udf/cpp/src/strings/udf/shim.cu
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <cudf/strings/udf/case.cuh>
 #include <cudf/strings/udf/char_types.cuh>
 #include <cudf/strings/udf/search.cuh>
 #include <cudf/strings/udf/starts_with.cuh>
@@ -128,7 +129,7 @@ extern "C" __device__ int lt(bool* nb_retval, void const* str, void const* rhs)
   return 0;
 }
 
-extern "C" __device__ int pyislower(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyislower(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -137,7 +138,7 @@ extern "C" __device__ int pyislower(bool* nb_retval, void const* str, std::int64
   return 0;
 }
 
-extern "C" __device__ int pyisupper(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisupper(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -146,7 +147,7 @@ extern "C" __device__ int pyisupper(bool* nb_retval, void const* str, std::int64
   return 0;
 }
 
-extern "C" __device__ int pyisspace(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisspace(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -155,7 +156,7 @@ extern "C" __device__ int pyisspace(bool* nb_retval, void const* str, std::int64
   return 0;
 }
 
-extern "C" __device__ int pyisdecimal(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisdecimal(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -164,7 +165,7 @@ extern "C" __device__ int pyisdecimal(bool* nb_retval, void const* str, std::int
   return 0;
 }
 
-extern "C" __device__ int pyisnumeric(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisnumeric(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -173,7 +174,7 @@ extern "C" __device__ int pyisnumeric(bool* nb_retval, void const* str, std::int
   return 0;
 }
 
-extern "C" __device__ int pyisdigit(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisdigit(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -182,7 +183,7 @@ extern "C" __device__ int pyisdigit(bool* nb_retval, void const* str, std::int64
   return 0;
 }
 
-extern "C" __device__ int pyisalnum(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisalnum(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -191,7 +192,7 @@ extern "C" __device__ int pyisalnum(bool* nb_retval, void const* str, std::int64
   return 0;
 }
 
-extern "C" __device__ int pyisalpha(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyisalpha(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -200,7 +201,7 @@ extern "C" __device__ int pyisalpha(bool* nb_retval, void const* str, std::int64
   return 0;
 }
 
-extern "C" __device__ int pyistitle(bool* nb_retval, void const* str, std::int64_t chars_table)
+extern "C" __device__ int pyistitle(bool* nb_retval, void const* str, std::uintptr_t chars_table)
 {
   auto str_view = reinterpret_cast<cudf::string_view const*>(str);
 
@@ -270,6 +271,51 @@ extern "C" __device__ int rstrip(int* nb_retval,
 
   return 0;
 }
+extern "C" __device__ int upper(int* nb_retval,
+                                void* udf_str,
+                                void const* st,
+                                std::uintptr_t flags_table,
+                                std::uintptr_t cases_table,
+                                std::uintptr_t special_table)
+{
+  auto udf_str_ptr = new (udf_str) udf_string;
+  auto st_ptr      = reinterpret_cast<cudf::string_view const*>(st);
+
+  auto flags_table_ptr =
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(flags_table);
+  auto cases_table_ptr =
+    reinterpret_cast<cudf::strings::detail::character_cases_table_type*>(cases_table);
+  auto special_table_ptr =
+    reinterpret_cast<cudf::strings::detail::special_case_mapping*>(special_table);
+
+  cudf::strings::udf::chars_tables tables{flags_table_ptr, cases_table_ptr, special_table_ptr};
+
+  *udf_str_ptr = to_upper(tables, *st_ptr);
+
+  return 0;
+}
+
+extern "C" __device__ int lower(int* nb_retval,
+                                void* udf_str,
+                                void const* st,
+                                std::uintptr_t flags_table,
+                                std::uintptr_t cases_table,
+                                std::uintptr_t special_table)
+{
+  auto udf_str_ptr = new (udf_str) udf_string;
+  auto st_ptr      = reinterpret_cast<cudf::string_view const*>(st);
+
+  auto flags_table_ptr =
+    reinterpret_cast<cudf::strings::detail::character_flags_table_type*>(flags_table);
+  auto cases_table_ptr =
+    reinterpret_cast<cudf::strings::detail::character_cases_table_type*>(cases_table);
+  auto special_table_ptr =
+    reinterpret_cast<cudf::strings::detail::special_case_mapping*>(special_table);
+
+  cudf::strings::udf::chars_tables tables{flags_table_ptr, cases_table_ptr, special_table_ptr};
+  *udf_str_ptr = to_lower(tables, *st_ptr);
+  return 0;
+}
 
 extern "C" __device__ int concat(int* nb_retval, void* udf_str, void* const* lhs, void* const* rhs)
 {
diff --git a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
index b4d5014d9e0..3e6491e32e7 100644
--- a/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
+++ b/python/strings_udf/cpp/src/strings/udf/udf_apis.cu
@@ -42,7 +42,8 @@ namespace {
 struct udf_string_to_string_view_transform_fn {
   __device__ cudf::string_view operator()(cudf::strings::udf::udf_string const& dstr)
   {
-    return cudf::string_view{dstr.data(), dstr.size_bytes()};
+    return dstr.data() == nullptr ? cudf::string_view{}
+                                  : cudf::string_view{dstr.data(), dstr.size_bytes()};
   }
 };
 
diff --git a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
index 7b90760abcc..b3bf6465db6 100644
--- a/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
+++ b/python/strings_udf/strings_udf/_lib/cpp/strings_udf.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-from libc.stdint cimport uint8_t
+from libc.stdint cimport uint8_t, uint16_t
 from libcpp.memory cimport unique_ptr
 from libcpp.string cimport string
 from libcpp.vector cimport vector
@@ -28,3 +28,5 @@ cdef extern from "cudf/strings/udf/udf_apis.hpp"  namespace \
 cdef extern from "cudf/strings/detail/char_tables.hpp" namespace \
         "cudf::strings::detail" nogil:
     cdef const uint8_t* get_character_flags_table() except +
+    cdef const uint16_t* get_character_cases_table() except +
+    cdef const void* get_special_case_mapping_table() except +
diff --git a/python/strings_udf/strings_udf/_lib/tables.pyx b/python/strings_udf/strings_udf/_lib/tables.pyx
index 5443364a4a7..6442a34f63f 100644
--- a/python/strings_udf/strings_udf/_lib/tables.pyx
+++ b/python/strings_udf/strings_udf/_lib/tables.pyx
@@ -1,9 +1,11 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-from libc.stdint cimport uint8_t, uintptr_t
+from libc.stdint cimport uint8_t, uint16_t, uintptr_t
 
 from strings_udf._lib.cpp.strings_udf cimport (
+    get_character_cases_table as cpp_get_character_cases_table,
     get_character_flags_table as cpp_get_character_flags_table,
+    get_special_case_mapping_table as cpp_get_special_case_mapping_table,
 )
 
 import numpy as np
@@ -11,4 +13,14 @@ import numpy as np
 
 def get_character_flags_table_ptr():
     cdef const uint8_t* tbl_ptr = cpp_get_character_flags_table()
-    return np.int64(<uintptr_t>tbl_ptr)
+    return np.uintp(<uintptr_t>tbl_ptr)
+
+
+def get_character_cases_table_ptr():
+    cdef const uint16_t* tbl_ptr = cpp_get_character_cases_table()
+    return np.uintp(<uintptr_t>tbl_ptr)
+
+
+def get_special_case_mapping_table_ptr():
+    cdef const void* tbl_ptr = cpp_get_special_case_mapping_table()
+    return np.uintp(<uintptr_t>tbl_ptr)
diff --git a/python/strings_udf/strings_udf/_typing.py b/python/strings_udf/strings_udf/_typing.py
index b678db88b95..3fadf030ce9 100644
--- a/python/strings_udf/strings_udf/_typing.py
+++ b/python/strings_udf/strings_udf/_typing.py
@@ -234,6 +234,7 @@ def resolve_count(self, mod):
     "isnumeric",
     "istitle",
 ]
+string_unary_funcs = ["upper", "lower"]
 string_return_attrs = ["strip", "lstrip", "rstrip"]
 
 for func in bool_binary_funcs:
@@ -263,4 +264,11 @@ def resolve_count(self, mod):
         create_identifier_attr(func, types.boolean),
     )
 
+for func in string_unary_funcs:
+    setattr(
+        StringViewAttrs,
+        f"resolve_{func}",
+        create_identifier_attr(func, udf_string),
+    )
+
 cuda_decl_registry.register_attr(StringViewAttrs)
diff --git a/python/strings_udf/strings_udf/lowering.py b/python/strings_udf/strings_udf/lowering.py
index 9e34b61e6da..cca3066a844 100644
--- a/python/strings_udf/strings_udf/lowering.py
+++ b/python/strings_udf/strings_udf/lowering.py
@@ -13,10 +13,16 @@
     registry as cuda_lowering_registry,
 )
 
-from strings_udf._lib.tables import get_character_flags_table_ptr
+from strings_udf._lib.tables import (
+    get_character_cases_table_ptr,
+    get_character_flags_table_ptr,
+    get_special_case_mapping_table_ptr,
+)
 from strings_udf._typing import size_type, string_view, udf_string
 
 character_flags_table_ptr = get_character_flags_table_ptr()
+character_cases_table_ptr = get_character_cases_table_ptr()
+special_case_mapping_table_ptr = get_special_case_mapping_table_ptr()
 
 _STR_VIEW_PTR = types.CPointer(string_view)
 _UDF_STRING_PTR = types.CPointer(udf_string)
@@ -76,6 +82,19 @@ def _declare_strip_func(name):
 )
 
 
+def _declare_upper_or_lower(func):
+    return cuda.declare_device(
+        func,
+        types.void(
+            _UDF_STRING_PTR,
+            _STR_VIEW_PTR,
+            types.uintp,
+            types.uintp,
+            types.uintp,
+        ),
+    )
+
+
 _string_view_isdigit = _declare_bool_str_int_func("pyisdigit")
 _string_view_isalnum = _declare_bool_str_int_func("pyisalnum")
 _string_view_isalpha = _declare_bool_str_int_func("pyisalpha")
@@ -85,6 +104,8 @@ def _declare_strip_func(name):
 _string_view_isupper = _declare_bool_str_int_func("pyisupper")
 _string_view_islower = _declare_bool_str_int_func("pyislower")
 _string_view_istitle = _declare_bool_str_int_func("pyistitle")
+_string_view_upper = _declare_upper_or_lower("upper")
+_string_view_lower = _declare_upper_or_lower("lower")
 
 
 _string_view_count = cuda.declare_device(
@@ -335,12 +356,12 @@ def id_func_impl(context, builder, sig, args):
             # must be resolved at runtime after context initialization,
             # therefore cannot be a global variable
             tbl_ptr = context.get_constant(
-                types.int64, character_flags_table_ptr
+                types.uintp, character_flags_table_ptr
             )
             result = context.compile_internal(
                 builder,
                 cuda_func,
-                nb_signature(types.boolean, _STR_VIEW_PTR, types.int64),
+                nb_signature(types.boolean, _STR_VIEW_PTR, types.uintp),
                 (str_ptr, tbl_ptr),
             )
 
@@ -351,6 +372,74 @@ def id_func_impl(context, builder, sig, args):
     return deco
 
 
+def create_upper_or_lower(id_func):
+    """
+    Provide a wrapper around numba's low-level extension API which
+    produces the boilerplate needed to implement either the upper
+    or lower attrs of a string view.
+    """
+
+    def deco(cuda_func):
+        @cuda_lower(id_func, string_view)
+        def id_func_impl(context, builder, sig, args):
+            str_ptr = builder.alloca(args[0].type)
+            builder.store(args[0], str_ptr)
+
+            # Lookup table required for conversion functions
+            # must be resolved at runtime after context initialization,
+            # therefore cannot be a global variable
+            flags_tbl_ptr = context.get_constant(
+                types.uintp, character_flags_table_ptr
+            )
+            cases_tbl_ptr = context.get_constant(
+                types.uintp, character_cases_table_ptr
+            )
+            special_tbl_ptr = context.get_constant(
+                types.uintp, special_case_mapping_table_ptr
+            )
+            udf_str_ptr = builder.alloca(
+                default_manager[udf_string].get_value_type()
+            )
+
+            _ = context.compile_internal(
+                builder,
+                cuda_func,
+                types.void(
+                    _UDF_STRING_PTR,
+                    _STR_VIEW_PTR,
+                    types.uintp,
+                    types.uintp,
+                    types.uintp,
+                ),
+                (
+                    udf_str_ptr,
+                    str_ptr,
+                    flags_tbl_ptr,
+                    cases_tbl_ptr,
+                    special_tbl_ptr,
+                ),
+            )
+
+            result = cgutils.create_struct_proxy(udf_string)(
+                context, builder, value=builder.load(udf_str_ptr)
+            )
+            return result._getvalue()
+
+        return id_func_impl
+
+    return deco
+
+
+@create_upper_or_lower("StringView.upper")
+def upper_impl(result, st, flags, cases, special):
+    return _string_view_upper(result, st, flags, cases, special)
+
+
+@create_upper_or_lower("StringView.lower")
+def lower_impl(result, st, flags, cases, special):
+    return _string_view_lower(result, st, flags, cases, special)
+
+
 @create_unary_identifier_func("StringView.isdigit")
 def isdigit_impl(st, tbl):
     return _string_view_isdigit(st, tbl)
diff --git a/python/strings_udf/strings_udf/tests/test_string_udfs.py b/python/strings_udf/strings_udf/tests/test_string_udfs.py
index 49663ee02ec..02c3a8b8c12 100644
--- a/python/strings_udf/strings_udf/tests/test_string_udfs.py
+++ b/python/strings_udf/strings_udf/tests/test_string_udfs.py
@@ -304,6 +304,20 @@ def func(st):
     run_udf_test(data, func, "str")
 
 
+def test_string_udf_upper(data):
+    def func(st):
+        return st.upper()
+
+    run_udf_test(data, func, "str")
+
+
+def test_string_udf_lower(data):
+    def func(st):
+        return st.lower()
+
+    run_udf_test(data, func, "str")
+
+
 @pytest.mark.parametrize("concat_char", ["1", "a", "12", " ", "", ".", "@"])
 def test_string_udf_concat(data, concat_char):
     def func(st):

From 2f2685f1fb3d2d135019e67b0cd6b2c963a6b59f Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 17 Nov 2022 14:37:53 -0600
Subject: [PATCH 184/202] Allow setting malloc heap size in string udfs
 (#12094)

Adds a mechanism for setting the default cuda malloc heap size for string UDFs, with 2gb default.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/12094
---
 python/cudf/cudf/core/udf/__init__.py      | 17 ++++++++++++++-
 python/strings_udf/strings_udf/__init__.py | 25 ++++++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/udf/__init__.py b/python/cudf/cudf/core/udf/__init__.py
index 8421d763167..8092207e037 100644
--- a/python/cudf/cudf/core/udf/__init__.py
+++ b/python/cudf/cudf/core/udf/__init__.py
@@ -1,5 +1,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
+from functools import lru_cache
+
 from numba import types
 from numba.cuda.cudaimpl import lower as cuda_lower
 
@@ -21,6 +23,8 @@
 )
 _STRING_UDFS_ENABLED = False
 cudf_str_dtype = dtype(str)
+
+
 try:
     import strings_udf
     from strings_udf import ptxpath
@@ -47,7 +51,18 @@
         utils.JIT_SUPPORTED_TYPES |= STRING_TYPES
         _supported_masked_types |= {string_view, udf_string}
 
-        utils.launch_arg_getters[cudf_str_dtype] = column_to_string_view_array
+        @lru_cache(maxsize=None)
+        def set_initial_malloc_heap_size():
+            strings_udf.set_malloc_heap_size()
+
+        def column_to_string_view_array_init_heap(col):
+            # lazily allocate heap only when a string needs to be returned
+            set_initial_malloc_heap_size()
+            return column_to_string_view_array(col)
+
+        utils.launch_arg_getters[
+            cudf_str_dtype
+        ] = column_to_string_view_array_init_heap
         utils.output_col_getters[cudf_str_dtype] = column_from_udf_string_array
         utils.masked_array_types[cudf_str_dtype] = string_view
         row_function.itemsizes[cudf_str_dtype] = string_view.size_bytes
diff --git a/python/strings_udf/strings_udf/__init__.py b/python/strings_udf/strings_udf/__init__.py
index 2222fb72009..bf13b79ab90 100644
--- a/python/strings_udf/strings_udf/__init__.py
+++ b/python/strings_udf/strings_udf/__init__.py
@@ -3,6 +3,7 @@
 import os
 
 from cubinlinker.patch import _numba_version_ok, get_logger, new_patched_linker
+from cuda import cudart
 from numba import cuda
 from numba.cuda.cudadrv.driver import Linker
 from ptxcompiler.patch import NO_DRIVER, safe_get_versions
@@ -87,6 +88,30 @@ def _get_ptx_file():
         return regular_result[1]
 
 
+# Maximum size of a string column is 2 GiB
+_STRINGS_UDF_DEFAULT_HEAP_SIZE = os.environ.get(
+    "STRINGS_UDF_HEAP_SIZE", 2**31
+)
+heap_size = 0
+
+
+def set_malloc_heap_size(size=None):
+    """
+    Heap size control for strings_udf, size in bytes.
+    """
+    global heap_size
+    if size is None:
+        size = _STRINGS_UDF_DEFAULT_HEAP_SIZE
+    if size != heap_size:
+        (ret,) = cudart.cudaDeviceSetLimit(
+            cudart.cudaLimit.cudaLimitMallocHeapSize, size
+        )
+        if ret.value != 0:
+            raise RuntimeError("Unable to set cudaMalloc heap size")
+
+        heap_size = size
+
+
 ptxpath = None
 versions = safe_get_versions()
 if versions != NO_DRIVER:

From db0d045383b8251be45adb06657cffc42bd103dd Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Thu, 17 Nov 2022 18:50:33 -0500
Subject: [PATCH 185/202] Ensure dlpack include is provided to cudf interop lib
 (#12139)

As brought up in #12081 it is possible to have python build failures due to no include paths to dlpack being provided. This fixes the issue by ensure that the DLPACK_INCLUDE_DIR is propagated down to the interop target.

We don't run into this issue with conda, since the dlpack headers are inside the conda include dir which is already being provided to the compiler.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/12139
---
 cpp/cmake/thirdparty/get_dlpack.cmake | 3 ++-
 python/cudf/CMakeLists.txt            | 6 ++++++
 python/cudf/cudf/_lib/CMakeLists.txt  | 5 +++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/cpp/cmake/thirdparty/get_dlpack.cmake b/cpp/cmake/thirdparty/get_dlpack.cmake
index 252d50c7af8..65b5f4ff2eb 100644
--- a/cpp/cmake/thirdparty/get_dlpack.cmake
+++ b/cpp/cmake/thirdparty/get_dlpack.cmake
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -15,6 +15,7 @@
 # This function finds dlpack and sets any additional necessary environment variables.
 function(find_and_configure_dlpack VERSION)
 
+  include(${rapids-cmake-dir}/find/generate_module.cmake)
   rapids_find_generate_module(DLPACK HEADER_NAMES dlpack.h)
 
   rapids_cpm_find(
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 8a3224237b6..1c8bef42e4c 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -51,6 +51,12 @@ if(FIND_CUDF_CPP)
   endif()
 
   find_package(cudf ${cudf_version} REQUIRED)
+
+  # an installed version of libcudf doesn't provide the dlpack headers so we need to download dlpack
+  # for the interop.pyx
+  include(rapids-cpm)
+  rapids_cpm_init()
+  include(../../cpp/cmake/thirdparty/get_dlpack.cmake)
 else()
   set(cudf_FOUND OFF)
 endif()
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index df17b8f2032..d58bdee02ad 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -69,6 +69,11 @@ foreach(target IN LISTS targets_using_numpy)
   target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
 endforeach()
 
+set(targets_using_dlpack interop)
+foreach(target IN LISTS targets_using_dlpack)
+  target_include_directories(${target} PRIVATE "${DLPACK_INCLUDE_DIR}")
+endforeach()
+
 add_subdirectory(io)
 add_subdirectory(nvtext)
 add_subdirectory(strings)

From ec8888c7ccb741f38ba3980b8bb1c39f9bde0f96 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Thu, 17 Nov 2022 18:23:45 -0800
Subject: [PATCH 186/202] fix selection of original vs compressed blocks,
 padding

---
 cpp/src/io/orc/stripe_enc.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index 013761343d3..712dbf35674 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -1180,7 +1180,7 @@ __global__ void __launch_bounds__(256)
   for (uint32_t b = t; b < num_blocks; b += 256) {
     uint32_t blk_size = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size));
     inputs[ss.first_block + b]  = {src + b * comp_blk_size, blk_size};
-    auto const dst_offset       = b * (padded_block_header_size + padded_comp_block_size);
+    auto const dst_offset       = padded_block_header_size + b * (padded_block_header_size + padded_comp_block_size);
     outputs[ss.first_block + b] = {dst + dst_offset, max_comp_blk_size};
     results[ss.first_block + b] = {0, compression_status::FAILURE};
   }
@@ -1234,7 +1234,7 @@ __global__ void __launch_bounds__(1024)
                        ? results[ss.first_block + b].bytes_written
                        : src_len;
       uint32_t blk_size24{};
-      if (results[ss.first_block + b].status == compression_status::SUCCESS) {
+      if (src_len < dst_len) {
         // Copy from uncompressed source
         src                                       = inputs[ss.first_block + b].data();
         results[ss.first_block + b].bytes_written = src_len;

From e29ea84bd213f8efec97ff86ac174bbe9f9e5a62 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Thu, 17 Nov 2022 18:25:23 -0800
Subject: [PATCH 187/202] style

---
 cpp/src/io/orc/stripe_enc.cu | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index 712dbf35674..a9a1c4ad861 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -1179,8 +1179,9 @@ __global__ void __launch_bounds__(256)
   num_blocks = (ss.stream_size > 0) ? (ss.stream_size - 1) / comp_blk_size + 1 : 1;
   for (uint32_t b = t; b < num_blocks; b += 256) {
     uint32_t blk_size = min(comp_blk_size, ss.stream_size - min(b * comp_blk_size, ss.stream_size));
-    inputs[ss.first_block + b]  = {src + b * comp_blk_size, blk_size};
-    auto const dst_offset       = padded_block_header_size + b * (padded_block_header_size + padded_comp_block_size);
+    inputs[ss.first_block + b] = {src + b * comp_blk_size, blk_size};
+    auto const dst_offset =
+      padded_block_header_size + b * (padded_block_header_size + padded_comp_block_size);
     outputs[ss.first_block + b] = {dst + dst_offset, max_comp_blk_size};
     results[ss.first_block + b] = {0, compression_status::FAILURE};
   }

From 3fb09d173d98a78fbda218d32de5bb8df1d1db0f Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Thu, 17 Nov 2022 18:35:40 -0800
Subject: [PATCH 188/202] Implement chunked Parquet reader (#11867)

This adds chunked Parquet reader, which can perform chunked reading for accessing files by an iterative manner. Instead of reading the input file all at once, we can read it chunk by chunk, each chunk can be limited to be small enough to not exceed the cudf internal limit (2GB/2 billions rows):
```
auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
do {
    auto const chunk = reader.read_chunk();
    // Process chunk
} while (reader.has_next());
```

Authors:
  - Nghia Truong (https://github.com/ttnghia)
  - https://github.com/nvdbaranec

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: https://github.com/rapidsai/cudf/pull/11867
---
 cpp/include/cudf/io/detail/parquet.hpp       |  56 ++
 cpp/include/cudf/io/parquet.hpp              |  68 ++
 cpp/src/io/functions.cpp                     |  39 +
 cpp/src/io/parquet/page_data.cu              | 510 +++++------
 cpp/src/io/parquet/page_hdr.cu               |   1 +
 cpp/src/io/parquet/parquet_gpu.hpp           |  67 +-
 cpp/src/io/parquet/reader.cpp                |  15 +
 cpp/src/io/parquet/reader_impl.cpp           | 162 ++--
 cpp/src/io/parquet/reader_impl.hpp           |  82 +-
 cpp/src/io/parquet/reader_impl_preprocess.cu | 749 +++++++++++++++-
 cpp/src/io/utilities/column_buffer.cpp       |  27 +
 cpp/src/io/utilities/column_buffer.hpp       |   6 +-
 cpp/tests/CMakeLists.txt                     |   2 +-
 cpp/tests/io/parquet_chunked_reader_test.cpp | 887 +++++++++++++++++++
 14 files changed, 2320 insertions(+), 351 deletions(-)
 create mode 100644 cpp/tests/io/parquet_chunked_reader_test.cpp

diff --git a/cpp/include/cudf/io/detail/parquet.hpp b/cpp/include/cudf/io/detail/parquet.hpp
index 8c7a7a21978..7f107017864 100644
--- a/cpp/include/cudf/io/detail/parquet.hpp
+++ b/cpp/include/cudf/io/detail/parquet.hpp
@@ -81,6 +81,62 @@ class reader {
   table_with_metadata read(parquet_reader_options const& options);
 };
 
+/**
+ * @brief The reader class that supports iterative reading of a given file.
+ *
+ * This class intentionally subclasses the `reader` class with private inheritance to hide the
+ * `reader::read()` API. As such, only chunked reading APIs are supported.
+ */
+class chunked_reader : private reader {
+ public:
+  /**
+   * @brief Constructor from a read size limit and an array of data sources with reader options.
+   *
+   * The typical usage should be similar to this:
+   * ```
+   *  do {
+   *    auto const chunk = reader.read_chunk();
+   *    // Process chunk
+   *  } while (reader.has_next());
+   *
+   * ```
+   *
+   * If `chunk_read_limit == 0` (i.e., no reading limit), a call to `read_chunk()` will read the
+   * whole file and return a table containing all rows.
+   *
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
+   * @param sources Input `datasource` objects to read the dataset from
+   * @param options Settings for controlling reading behavior
+   * @param stream CUDA stream used for device memory operations and kernel launches.
+   * @param mr Device memory resource to use for device memory allocation
+   */
+  explicit chunked_reader(std::size_t chunk_read_limit,
+                          std::vector<std::unique_ptr<cudf::io::datasource>>&& sources,
+                          parquet_reader_options const& options,
+                          rmm::cuda_stream_view stream,
+                          rmm::mr::device_memory_resource* mr);
+
+  /**
+   * @brief Destructor explicitly-declared to avoid inlined in header.
+   *
+   * Since the declaration of the internal `_impl` object does not exist in this header, this
+   * destructor needs to be defined in a separate source file which can access to that object's
+   * declaration.
+   */
+  ~chunked_reader();
+
+  /**
+   * @copydoc cudf::io::chunked_parquet_reader::has_next
+   */
+  [[nodiscard]] bool has_next() const;
+
+  /**
+   * @copydoc cudf::io::chunked_parquet_reader::read_chunk
+   */
+  [[nodiscard]] table_with_metadata read_chunk() const;
+};
+
 /**
  * @brief Class to write parquet dataset data into columns.
  */
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index c5425de308c..f3facae098d 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -399,6 +399,74 @@ table_with_metadata read_parquet(
   parquet_reader_options const& options,
   rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
 
+/**
+ * @brief The chunked parquet reader class to read Parquet file iteratively in to a series of
+ * tables, chunk by chunk.
+ *
+ * This class is designed to address the reading issue when reading very large Parquet files such
+ * that the sizes of their column exceed the limit that can be stored in cudf column. By reading the
+ * file content by chunks using this class, each chunk is guaranteed to have its sizes stay within
+ * the given limit.
+ */
+class chunked_parquet_reader {
+ public:
+  /**
+   * @brief Default constructor, this should never be used.
+   *
+   * This is added just to satisfy cython.
+   */
+  chunked_parquet_reader() = default;
+
+  /**
+   * @brief Constructor for chunked reader.
+   *
+   * This constructor requires the same `parquet_reader_option` parameter as in
+   * `cudf::read_parquet()`, and an additional parameter to specify the size byte limit of the
+   * output table for each reading.
+   *
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
+   * @param options The options used to read Parquet file
+   * @param mr Device memory resource to use for device memory allocation
+   */
+  chunked_parquet_reader(
+    std::size_t chunk_read_limit,
+    parquet_reader_options const& options,
+    rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+
+  /**
+   * @brief Destructor, destroying the internal reader instance.
+   *
+   * Since the declaration of the internal `reader` object does not exist in this header, this
+   * destructor needs to be defined in a separate source file which can access to that object's
+   * declaration.
+   */
+  ~chunked_parquet_reader();
+
+  /**
+   * @brief Check if there is any data in the given file has not yet read.
+   *
+   * @return A boolean value indicating if there is any data left to read
+   */
+  [[nodiscard]] bool has_next() const;
+
+  /**
+   * @brief Read a chunk of rows in the given Parquet file.
+   *
+   * The sequence of returned tables, if concatenated by their order, guarantees to form a complete
+   * dataset as reading the entire given file at once.
+   *
+   * An empty table will be returned if the given file is empty, or all the data in the file has
+   * been read and returned by the previous calls.
+   *
+   * @return An output `cudf::table` along with its metadata
+   */
+  [[nodiscard]] table_with_metadata read_chunk() const;
+
+ private:
+  std::unique_ptr<cudf::io::detail::parquet::chunked_reader> reader;
+};
+
 /** @} */  // end of group
 /**
  * @addtogroup io_writers
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index c244a30dc75..1a5a43d2b90 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -501,6 +501,45 @@ std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const
   return writer->close(options.get_column_chunks_file_paths());
 }
 
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::chunked_parquet_reader
+ */
+chunked_parquet_reader::chunked_parquet_reader(std::size_t chunk_read_limit,
+                                               parquet_reader_options const& options,
+                                               rmm::mr::device_memory_resource* mr)
+  : reader{std::make_unique<detail_parquet::chunked_reader>(chunk_read_limit,
+                                                            make_datasources(options.get_source()),
+                                                            options,
+                                                            cudf::get_default_stream(),
+                                                            mr)}
+{
+}
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::~chunked_parquet_reader
+ */
+chunked_parquet_reader::~chunked_parquet_reader() = default;
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::has_next
+ */
+bool chunked_parquet_reader::has_next() const
+{
+  CUDF_FUNC_RANGE();
+  CUDF_EXPECTS(reader != nullptr, "Reader has not been constructed properly.");
+  return reader->has_next();
+}
+
+/**
+ * @copydoc cudf::io::chunked_parquet_reader::read_chunk
+ */
+table_with_metadata chunked_parquet_reader::read_chunk() const
+{
+  CUDF_FUNC_RANGE();
+  CUDF_EXPECTS(reader != nullptr, "Reader has not been constructed properly.");
+  return reader->read_chunk();
+}
+
 /**
  * @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
  */
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index b36826002f4..c580aa5bbc0 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -18,8 +18,10 @@
 #include <io/utilities/block_utils.cuh>
 #include <io/utilities/column_buffer.hpp>
 
+#include <cuda/std/tuple>
 #include <cudf/detail/utilities/assert.cuh>
 #include <cudf/detail/utilities/hash_functions.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/strings/string_view.hpp>
 #include <cudf/utilities/bit.hpp>
 
@@ -52,6 +54,8 @@ namespace io {
 namespace parquet {
 namespace gpu {
 
+namespace {
+
 struct page_state_s {
   const uint8_t* data_start;
   const uint8_t* data_end;
@@ -281,13 +285,18 @@ __device__ void gpuDecodeStream(
  * 31)
  * @param[in] t Warp1 thread ID (0..31)
  *
- * @return The new output position
+ * @return A pair containing the new output position, and the total length of strings decoded (this
+ * will only be valid on thread 0 and if sizes_only is true)
  */
-__device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_pos, int t)
+template <bool sizes_only>
+__device__ cuda::std::pair<int, int> gpuDecodeDictionaryIndices(volatile page_state_s* s,
+                                                                int target_pos,
+                                                                int t)
 {
   const uint8_t* end = s->data_end;
   int dict_bits      = s->dict_bits;
   int pos            = s->dict_pos;
+  int str_len        = 0;
 
   while (pos < target_pos) {
     int is_literal, batch_len;
@@ -332,8 +341,11 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_p
     __syncwarp();
     is_literal = shuffle(is_literal);
     batch_len  = shuffle(batch_len);
+
+    // compute dictionary index.
+    int dict_idx = 0;
     if (t < batch_len) {
-      int dict_idx = s->dict_val;
+      dict_idx = s->dict_val;
       if (is_literal) {
         int32_t ofs      = (t - ((batch_len + 7) & ~7)) * dict_bits;
         const uint8_t* p = s->data_start + (ofs >> 3);
@@ -353,11 +365,36 @@ __device__ int gpuDecodeDictionaryIndices(volatile page_state_s* s, int target_p
           dict_idx &= (1 << dict_bits) - 1;
         }
       }
-      s->dict_idx[(pos + t) & (non_zero_buffer_size - 1)] = dict_idx;
+
+      // if we're not computing sizes, store off the dictionary index
+      if constexpr (!sizes_only) { s->dict_idx[(pos + t) & (non_zero_buffer_size - 1)] = dict_idx; }
+    }
+
+    // if we're computing sizes, add the length(s)
+    if constexpr (sizes_only) {
+      int const len = [&]() {
+        if (t >= batch_len) { return 0; }
+        // we may end up decoding more indices than we asked for. so don't include those in the
+        // size calculation
+        if (pos + t >= target_pos) { return 0; }
+        // TODO:  refactor this with gpuGetStringData / gpuGetStringSize
+        uint32_t const dict_pos = (s->dict_bits > 0) ? dict_idx * sizeof(string_index_pair) : 0;
+        if (target_pos && dict_pos < (uint32_t)s->dict_size) {
+          const auto* src = reinterpret_cast<const string_index_pair*>(s->dict_base + dict_pos);
+          return src->second;
+        }
+        return 0;
+      }();
+
+      using WarpReduce = cub::WarpReduce<size_type>;
+      __shared__ typename WarpReduce::TempStorage temp_storage;
+      // note: str_len will only be valid on thread 0.
+      str_len += WarpReduce(temp_storage).Sum(len);
     }
+
     pos += batch_len;
   }
-  return pos;
+  return {pos, str_len};
 }
 
 /**
@@ -424,17 +461,20 @@ __device__ int gpuDecodeRleBooleans(volatile page_state_s* s, int target_pos, in
 }
 
 /**
- * @brief Parses the length and position of strings
+ * @brief Parses the length and position of strings and returns total length of all strings
+ * processed
  *
  * @param[in,out] s Page state input/output
  * @param[in] target_pos Target output position
  * @param[in] t Thread ID
  *
- * @return The new output position
+ * @return Total length of strings processed
  */
-__device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
+__device__ size_type gpuInitStringDescriptors(volatile page_state_s* s, int target_pos, int t)
 {
-  int pos = s->dict_pos;
+  int pos       = s->dict_pos;
+  int total_len = 0;
+
   // This step is purely serial
   if (!t) {
     const uint8_t* cur = s->data_start;
@@ -453,21 +493,26 @@ __device__ void gpuInitStringDescriptors(volatile page_state_s* s, int target_po
       s->dict_idx[pos & (non_zero_buffer_size - 1)] = k;
       s->str_len[pos & (non_zero_buffer_size - 1)]  = len;
       k += len;
+      total_len += len;
       pos++;
     }
     s->dict_val = k;
     __threadfence_block();
   }
+
+  return total_len;
 }
 
 /**
- * @brief Output a string descriptor
+ * @brief Retrieves string information for a string at the specified source position
  *
- * @param[in,out] s Page state input/output
+ * @param[in] s Page state input
  * @param[in] src_pos Source position
- * @param[in] dstv Pointer to row output data (string descriptor or 32-bit hash)
+ *
+ * @return A pair containing a pointer to the string and its length
  */
-inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, void* dstv)
+inline __device__ cuda::std::pair<const char*, size_t> gpuGetStringData(volatile page_state_s* s,
+                                                                        int src_pos)
 {
   const char* ptr = nullptr;
   size_t len      = 0;
@@ -490,6 +535,20 @@ inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, vo
       len = s->str_len[src_pos & (non_zero_buffer_size - 1)];
     }
   }
+
+  return {ptr, len};
+}
+
+/**
+ * @brief Output a string descriptor
+ *
+ * @param[in,out] s Page state input/output
+ * @param[in] src_pos Source position
+ * @param[in] dstv Pointer to row output data (string descriptor or 32-bit hash)
+ */
+inline __device__ void gpuOutputString(volatile page_state_s* s, int src_pos, void* dstv)
+{
+  auto [ptr, len] = gpuGetStringData(s, src_pos);
   if (s->dtype_len == 4) {
     // Output hash. This hash value is used if the option to convert strings to
     // categoricals is enabled. The seed value is chosen arbitrarily.
@@ -818,14 +877,17 @@ static __device__ void gpuOutputGeneric(volatile page_state_s* s,
  * @param[in, out] s The local page state to be filled in
  * @param[in] p The global page to be copied from
  * @param[in] chunks The global list of chunks
- * @param[in] num_rows Maximum number of rows to read
  * @param[in] min_row Crop all rows below min_row
+ * @param[in] num_rows Maximum number of rows to read
+ * @param[in] is_decode_step If we are setting up for the decode step (instead of the preprocess
+ * step)
  */
 static __device__ bool setupLocalPageInfo(page_state_s* const s,
                                           PageInfo const* p,
                                           device_span<ColumnChunkDesc const> chunks,
                                           size_t min_row,
-                                          size_t num_rows)
+                                          size_t num_rows,
+                                          bool is_decode_step)
 {
   int t = threadIdx.x;
   int chunk_idx;
@@ -926,17 +988,25 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
         s->dtype_len = 8;  // Convert to 64-bit timestamp
       }
 
-      // first row within the page to output
-      if (page_start_row >= min_row) {
-        s->first_row = 0;
-      } else {
-        s->first_row = (int32_t)min(min_row - page_start_row, (size_t)s->page.num_rows);
-      }
-      // # of rows within the page to output
-      s->num_rows = s->page.num_rows;
-      if ((page_start_row + s->first_row) + s->num_rows > min_row + num_rows) {
-        s->num_rows =
-          (int32_t)max((int64_t)(min_row + num_rows - (page_start_row + s->first_row)), INT64_C(0));
+      // NOTE: s->page.num_rows, s->col.chunk_row, s->first_row and s->num_rows will be
+      // invalid/bogus during first pass of the preprocess step for nested types. this is ok
+      // because we ignore these values in that stage.
+      {
+        auto const max_row = min_row + num_rows;
+
+        // if we are totally outside the range of the input, do nothing
+        if ((page_start_row > max_row) || (page_start_row + s->page.num_rows < min_row)) {
+          s->first_row = 0;
+          s->num_rows  = 0;
+        }
+        // otherwise
+        else {
+          s->first_row             = page_start_row >= min_row ? 0 : min_row - page_start_row;
+          auto const max_page_rows = s->page.num_rows - s->first_row;
+          s->num_rows              = (page_start_row + s->first_row) + max_page_rows <= max_row
+                                       ? max_page_rows
+                                       : max_row - (page_start_row + s->first_row);
+        }
       }
 
       // during the decoding step we need to offset the global output buffers
@@ -944,7 +1014,11 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       // is responsible for.
       // - for flat schemas, we can do this directly by using row counts
       // - for nested schemas, these offsets are computed during the preprocess step
-      if (s->col.column_data_base != nullptr) {
+      //
+      // NOTE: in a chunked read situation, s->col.column_data_base and s->col.valid_map_base
+      // will be aliased to memory that has been freed when we get here in the non-decode step, so
+      // we cannot check against nullptr.  we'll just check a flag directly.
+      if (is_decode_step) {
         int max_depth = s->col.max_nesting_depth;
         for (int idx = 0; idx < max_depth; idx++) {
           PageNestingInfo* pni = &s->page.nesting[idx];
@@ -954,12 +1028,13 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
           if (s->col.max_level[level_type::REPETITION] == 0) {
             output_offset = page_start_row >= min_row ? page_start_row - min_row : 0;
           }
-          // for schemas with lists, we've already got the exactly value precomputed
+          // for schemas with lists, we've already got the exact value precomputed
           else {
             output_offset = pni->page_start_value;
           }
 
           pni->data_out = static_cast<uint8_t*>(s->col.column_data_base[idx]);
+
           if (pni->data_out != nullptr) {
             // anything below max depth with a valid data pointer must be a list, so the
             // element size is the size of the offset type.
@@ -1036,6 +1111,7 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
       s->page.skipped_leaf_values = 0;
       s->input_value_count        = 0;
       s->input_row_count          = 0;
+      s->input_leaf_count         = 0;
 
       s->row_index_lower_bound = -1;
     }
@@ -1064,13 +1140,14 @@ static __device__ bool setupLocalPageInfo(page_state_s* const s,
 
       // if we're in the decoding step, jump directly to the first
       // value we care about
-      if (s->col.column_data_base != nullptr) {
+      if (is_decode_step) {
         s->input_value_count = s->page.skipped_values > -1 ? s->page.skipped_values : 0;
       } else {
-        s->input_value_count        = 0;
-        s->input_leaf_count         = 0;
-        s->page.skipped_values      = -1;
-        s->page.skipped_leaf_values = -1;
+        s->input_value_count = 0;
+        s->input_leaf_count  = 0;
+        s->page.skipped_values =
+          -1;  // magic number to indicate it hasn't been set for use inside UpdatePageSizes
+        s->page.skipped_leaf_values = 0;
       }
     }
 
@@ -1397,7 +1474,7 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
                                           bool bounds_set)
 {
   // max nesting depth of the column
-  int max_depth = s->col.max_nesting_depth;
+  int const max_depth = s->col.max_nesting_depth;
   // how many input level values we've processed in the page so far
   int input_value_count = s->input_value_count;
   // how many leaf values we've processed in the page so far
@@ -1411,11 +1488,10 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       start_depth, end_depth, d, s, input_value_count, target_input_value_count, t);
 
     // count rows and leaf values
-    int is_new_row                = start_depth == 0 ? 1 : 0;
-    uint32_t warp_row_count_mask  = ballot(is_new_row);
-    int is_new_leaf               = (d >= s->page.nesting[max_depth - 1].max_def_level) ? 1 : 0;
-    uint32_t warp_leaf_count_mask = ballot(is_new_leaf);
-
+    int const is_new_row               = start_depth == 0 ? 1 : 0;
+    uint32_t const warp_row_count_mask = ballot(is_new_row);
+    int const is_new_leaf = (d >= s->page.nesting[max_depth - 1].max_def_level) ? 1 : 0;
+    uint32_t const warp_leaf_count_mask = ballot(is_new_leaf);
     // is this thread within row bounds? on the first pass we don't know the bounds, so we will be
     // computing the full size of the column.  on the second pass, we will know our actual row
     // bounds, so the computation will cap sizes properly.
@@ -1429,8 +1505,8 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
                         ? 1
                         : 0;
 
-      uint32_t row_bounds_mask  = ballot(in_row_bounds);
-      int first_thread_in_range = __ffs(row_bounds_mask) - 1;
+      uint32_t const row_bounds_mask  = ballot(in_row_bounds);
+      int const first_thread_in_range = __ffs(row_bounds_mask) - 1;
 
       // if we've found the beginning of the first row, mark down the position
       // in the def/repetition buffer (skipped_values) and the data buffer (skipped_leaf_values)
@@ -1443,13 +1519,15 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
       }
     }
 
-    // increment counts across all nesting depths
+    // increment value counts across all nesting depths
     for (int s_idx = 0; s_idx < max_depth; s_idx++) {
-      // if we are within the range of nesting levels we should be adding value indices for
-      int in_nesting_bounds = (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
+      PageNestingInfo* pni = &s->page.nesting[s_idx];
 
-      uint32_t count_mask = ballot(in_nesting_bounds);
-      if (!t) { s->page.nesting[s_idx].size += __popc(count_mask); }
+      // if we are within the range of nesting levels we should be adding value indices for
+      int const in_nesting_bounds =
+        (s_idx >= start_depth && s_idx <= end_depth && in_row_bounds) ? 1 : 0;
+      uint32_t const count_mask = ballot(in_nesting_bounds);
+      if (!t) { pni->batch_size += __popc(count_mask); }
     }
 
     input_value_count += min(32, (target_input_value_count - input_value_count));
@@ -1465,6 +1543,21 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
   }
 }
 
+__device__ size_type gpuGetStringSize(page_state_s* s, int target_count, int t)
+{
+  auto dict_target_pos = target_count;
+  size_type str_len    = 0;
+  if (s->dict_base) {
+    auto const [new_target_pos, len] = gpuDecodeDictionaryIndices<true>(s, target_count, t);
+    dict_target_pos                  = new_target_pos;
+    str_len                          = len;
+  } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
+    str_len = gpuInitStringDescriptors(s, target_count, t);
+  }
+  if (!t) { *(volatile int32_t*)&s->dict_pos = dict_target_pos; }
+  return str_len;
+}
+
 /**
  * @brief Kernel for computing per-page column size information for all nesting levels.
  *
@@ -1473,17 +1566,20 @@ static __device__ void gpuUpdatePageSizes(page_state_s* s,
  * @param pages List of pages
  * @param chunks List of column chunks
  * @param min_row Row index to start reading at
- * @param num_rows Maximum number of rows to read. Pass as INT_MAX to guarantee reading all rows.
- * @param trim_pass Whether or not this is the trim pass.  We first have to compute
+ * @param num_rows Maximum number of rows to read. Pass as INT_MAX to guarantee reading all rows
+ * @param is_base_pass Whether or not this is the base pass.  We first have to compute
  * the full size information of every page before we come through in a second (trim) pass
- * to determine what subset of rows in this page we should be reading.
+ * to determine what subset of rows in this page we should be reading
+ * @param compute_string_sizes Whether or not we should be computing string sizes
+ * (PageInfo::str_bytes) as part of the pass
  */
 __global__ void __launch_bounds__(block_size)
   gpuComputePageSizes(PageInfo* pages,
                       device_span<ColumnChunkDesc const> chunks,
                       size_t min_row,
                       size_t num_rows,
-                      bool trim_pass)
+                      bool is_base_pass,
+                      bool compute_string_sizes)
 {
   __shared__ __align__(16) page_state_s state_g;
 
@@ -1492,34 +1588,81 @@ __global__ void __launch_bounds__(block_size)
   int t                 = threadIdx.x;
   PageInfo* pp          = &pages[page_idx];
 
+  if (!setupLocalPageInfo(s, pp, chunks, min_row, num_rows, false)) { return; }
+
+  if (!t) {
+    s->page.skipped_values      = -1;
+    s->page.skipped_leaf_values = 0;
+    s->page.str_bytes           = 0;
+    s->input_row_count          = 0;
+    s->input_value_count        = 0;
+
+    // in the base pass, we're computing the number of rows, make sure we visit absolutely
+    // everything
+    if (is_base_pass) {
+      s->first_row             = 0;
+      s->num_rows              = INT_MAX;
+      s->row_index_lower_bound = -1;
+    }
+  }
+
   // we only need to preprocess hierarchies with repetition in them (ie, hierarchies
   // containing lists anywhere within).
   bool const has_repetition = chunks[pp->chunk_idx].max_level[level_type::REPETITION] > 0;
-  if (!has_repetition) { return; }
+  compute_string_sizes =
+    compute_string_sizes && ((s->col.data_type & 7) == BYTE_ARRAY && s->dtype_len != 4);
+
+  // various early out optimizations:
+
+  // - if this is a flat hierarchy (no lists) and is not a string column. in this case we don't need
+  // to do
+  //   the expensive work of traversing the level data to determine sizes.  we can just compute it
+  //   directly.
+  if (!has_repetition && !compute_string_sizes) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) {
+        if (is_base_pass) { pp->nesting[i].size = pp->num_input_values; }
+        pp->nesting[i].batch_size = pp->num_input_values;
+      }
+      d += blockDim.x;
+    }
+    return;
+  }
+
+  // - if this page is not at the beginning or end of the trim bounds, the batch size is
+  //   the full page size
+  if (!is_base_pass && s->num_rows == s->page.num_rows) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = pp->nesting[i].size; }
+      d += blockDim.x;
+    }
+    return;
+  }
 
-  if (!setupLocalPageInfo(s, pp, chunks, trim_pass ? min_row : 0, trim_pass ? num_rows : INT_MAX)) {
+  // - if this page is completely trimmed, zero out sizes.
+  if (!is_base_pass && s->num_rows == 0) {
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].batch_size = 0; }
+      d += blockDim.x;
+    }
     return;
   }
 
+  // at this point we are going to be fully recomputing batch information
+
   // zero sizes
   int d = 0;
   while (d < s->page.num_nesting_levels) {
-    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].size = 0; }
+    if (d + t < s->page.num_nesting_levels) { s->page.nesting[d + t].batch_size = 0; }
     d += blockDim.x;
   }
-  if (!t) {
-    s->page.skipped_values      = -1;
-    s->page.skipped_leaf_values = -1;
-    s->input_row_count          = 0;
-    s->input_value_count        = 0;
 
-    // if this isn't the trim pass, make sure we visit absolutely everything
-    if (!trim_pass) {
-      s->first_row             = 0;
-      s->num_rows              = INT_MAX;
-      s->row_index_lower_bound = -1;
-    }
-  }
   __syncthreads();
 
   // optimization : it might be useful to have a version of gpuDecodeStream that could go wider than
@@ -1532,25 +1675,51 @@ __global__ void __launch_bounds__(block_size)
     while (!s->error && s->input_value_count < s->num_input_values) {
       // decode repetition and definition levels. these will attempt to decode at
       // least up to the target, but may decode a few more.
-      gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
+      if (has_repetition) {
+        gpuDecodeStream(s->rep, s, target_input_count, t, level_type::REPETITION);
+      }
       gpuDecodeStream(s->def, s, target_input_count, t, level_type::DEFINITION);
       __syncwarp();
 
       // we may have decoded different amounts from each stream, so only process what we've been
-      int actual_input_count =
-        min(s->lvl_count[level_type::REPETITION], s->lvl_count[level_type::DEFINITION]);
+      int actual_input_count = has_repetition ? min(s->lvl_count[level_type::REPETITION],
+                                                    s->lvl_count[level_type::DEFINITION])
+                                              : s->lvl_count[level_type::DEFINITION];
 
       // process what we got back
-      gpuUpdatePageSizes(s, actual_input_count, t, trim_pass);
+      gpuUpdatePageSizes(s, actual_input_count, t, !is_base_pass);
+      if (compute_string_sizes) {
+        auto const str_len = gpuGetStringSize(s, s->input_leaf_count, t);
+        if (!t) { s->page.str_bytes += str_len; }
+      }
+
       target_input_count = actual_input_count + batch_size;
       __syncwarp();
     }
   }
-  // update # rows in the actual page
+
+  // update output results:
+  // - real number of rows for the whole page
+  // - nesting sizes for the whole page
+  // - skipped value information for trimmed pages
+  // - string bytes
+  if (is_base_pass) {
+    // nesting level 0 is the root column, so the size is also the # of rows
+    if (!t) { pp->num_rows = s->page.nesting[0].batch_size; }
+
+    // store off this batch size as the "full" size
+    int d = 0;
+    while (d < s->page.num_nesting_levels) {
+      auto const i = d + t;
+      if (i < s->page.num_nesting_levels) { pp->nesting[i].size = pp->nesting[i].batch_size; }
+      d += blockDim.x;
+    }
+  }
+
   if (!t) {
-    pp->num_rows            = s->page.nesting[0].size;
     pp->skipped_values      = s->page.skipped_values;
     pp->skipped_leaf_values = s->page.skipped_leaf_values;
+    pp->str_bytes           = s->page.str_bytes;
   }
 }
 
@@ -1577,7 +1746,10 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   int t                 = threadIdx.x;
   int out_thread0;
 
-  if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows)) { return; }
+  if (!setupLocalPageInfo(s, &pages[page_idx], chunks, min_row, num_rows, true)) { return; }
+
+  // if we have no rows to do (eg, in a skip_rows/num_rows case)
+  if (s->num_rows == 0) { return; }
 
   if (s->dict_base) {
     out_thread0 = (s->dict_bits > 0) ? 64 : 32;
@@ -1614,7 +1786,7 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
 
       // WARP1: Decode dictionary indices, booleans or string positions
       if (s->dict_base) {
-        src_target_pos = gpuDecodeDictionaryIndices(s, src_target_pos, t & 0x1f);
+        src_target_pos = gpuDecodeDictionaryIndices<false>(s, src_target_pos, t & 0x1f).first;
       } else if ((s->col.data_type & 7) == BOOLEAN) {
         src_target_pos = gpuDecodeRleBooleans(s, src_target_pos, t & 0x1f);
       } else if ((s->col.data_type & 7) == BYTE_ARRAY) {
@@ -1701,71 +1873,18 @@ __global__ void __launch_bounds__(block_size) gpuDecodePageData(
   }
 }
 
-struct chunk_row_output_iter {
-  PageInfo* p;
-  using value_type        = size_type;
-  using difference_type   = size_type;
-  using pointer           = size_type*;
-  using reference         = size_type&;
-  using iterator_category = thrust::output_device_iterator_tag;
-
-  __host__ __device__ chunk_row_output_iter operator+(int i)
-  {
-    return chunk_row_output_iter{p + i};
-  }
-
-  __host__ __device__ void operator++() { p++; }
-
-  __device__ reference operator[](int i) { return p[i].chunk_row; }
-  __device__ reference operator*() { return p->chunk_row; }
-  __device__ void operator=(value_type v) { p->chunk_row = v; }
-};
-
-struct start_offset_output_iterator {
-  PageInfo* pages;
-  int* page_indices;
-  int cur_index;
-  int src_col_schema;
-  int nesting_depth;
-  int empty               = 0;
-  using value_type        = size_type;
-  using difference_type   = size_type;
-  using pointer           = size_type*;
-  using reference         = size_type&;
-  using iterator_category = thrust::output_device_iterator_tag;
-
-  __host__ __device__ start_offset_output_iterator operator+(int i)
-  {
-    return start_offset_output_iterator{
-      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
-  }
-
-  __host__ __device__ void operator++() { cur_index++; }
-
-  __device__ reference operator[](int i) { return dereference(cur_index + i); }
-  __device__ reference operator*() { return dereference(cur_index); }
-
- private:
-  __device__ reference dereference(int index)
-  {
-    PageInfo const& p = pages[page_indices[index]];
-    if (p.src_col_schema != src_col_schema || p.flags & PAGEINFO_FLAGS_DICTIONARY) { return empty; }
-    return p.nesting[nesting_depth].page_start_value;
-  }
-};
+}  // anonymous namespace
 
 /**
- * @copydoc cudf::io::parquet::gpu::PreprocessColumnData
+ * @copydoc cudf::io::parquet::gpu::ComputePageSizes
  */
-void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
-                          hostdevice_vector<ColumnChunkDesc> const& chunks,
-                          std::vector<input_column_info>& input_columns,
-                          std::vector<cudf::io::detail::column_buffer>& output_columns,
-                          size_t num_rows,
-                          size_t min_row,
-                          bool uses_custom_row_bounds,
-                          rmm::cuda_stream_view stream,
-                          rmm::mr::device_memory_resource* mr)
+void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
+                      hostdevice_vector<ColumnChunkDesc> const& chunks,
+                      size_t min_row,
+                      size_t num_rows,
+                      bool compute_num_rows,
+                      bool compute_string_sizes,
+                      rmm::cuda_stream_view stream)
 {
   dim3 dim_block(block_size, 1);
   dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
@@ -1776,124 +1895,7 @@ void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
   // If uses_custom_row_bounds is set to true, we have to do a second pass later that "trims"
   // the starting and ending read values to account for these bounds.
   gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-    pages.device_ptr(),
-    chunks,
-    // if uses_custom_row_bounds is false, include all possible rows.
-    uses_custom_row_bounds ? min_row : 0,
-    uses_custom_row_bounds ? num_rows : INT_MAX,
-    !uses_custom_row_bounds);
-
-  // computes:
-  // PageInfo::chunk_row for all pages
-  // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
-  // been computed during header decoding. the overall amount of work here is very small though.
-  auto key_input = thrust::make_transform_iterator(
-    pages.device_ptr(), [] __device__(PageInfo const& page) { return page.chunk_idx; });
-  auto page_input = thrust::make_transform_iterator(
-    pages.device_ptr(), [] __device__(PageInfo const& page) { return page.num_rows; });
-  thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
-                                key_input,
-                                key_input + pages.size(),
-                                page_input,
-                                chunk_row_output_iter{pages.device_ptr()});
-
-  // computes:
-  // PageNestingInfo::size for each level of nesting, for each page, taking row bounds into account.
-  // PageInfo::skipped_values, which tells us where to start decoding in the input  .
-  // It is only necessary to do this second pass if uses_custom_row_bounds is set (if the user has
-  // specified artifical bounds).
-  if (uses_custom_row_bounds) {
-    gpuComputePageSizes<<<dim_grid, dim_block, 0, stream.value()>>>(
-      pages.device_ptr(), chunks, min_row, num_rows, true);
-  }
-
-  // ordering of pages is by input column schema, repeated across row groups.  so
-  // if we had 3 columns, each with 2 pages, and 1 row group, our schema values might look like
-  //
-  // 1, 1, 2, 2, 3, 3
-  //
-  // However, if we had more than one row group, the pattern would be
-  //
-  // 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3
-  // ^ row group 0     |
-  //                   ^ row group 1
-  //
-  // To use exclusive_scan_by_key, the ordering we actually want is
-  //
-  // 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
-  //
-  // We also need to preserve key-relative page ordering, so we need to use a stable sort.
-  rmm::device_uvector<int> page_keys(pages.size(), stream);
-  rmm::device_uvector<int> page_index(pages.size(), stream);
-  {
-    thrust::transform(rmm::exec_policy(stream),
-                      pages.device_ptr(),
-                      pages.device_ptr() + pages.size(),
-                      page_keys.begin(),
-                      [] __device__(PageInfo const& page) { return page.src_col_schema; });
-
-    thrust::sequence(rmm::exec_policy(stream), page_index.begin(), page_index.end());
-    thrust::stable_sort_by_key(rmm::exec_policy(stream),
-                               page_keys.begin(),
-                               page_keys.end(),
-                               page_index.begin(),
-                               thrust::less<int>());
-  }
-
-  // compute output column sizes by examining the pages of the -input- columns
-  for (size_t idx = 0; idx < input_columns.size(); idx++) {
-    auto const& input_col = input_columns[idx];
-    auto src_col_schema   = input_col.schema_idx;
-    size_t max_depth      = input_col.nesting_depth();
-
-    auto* cols = &output_columns;
-    for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
-      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
-      cols          = &out_buf.children;
-
-      // size iterator. indexes pages by sorted order
-      auto size_input = thrust::make_transform_iterator(
-        page_index.begin(),
-        [src_col_schema, l_idx, pages = pages.device_ptr()] __device__(int index) {
-          auto const& page = pages[index];
-          if (page.src_col_schema != src_col_schema || page.flags & PAGEINFO_FLAGS_DICTIONARY) {
-            return 0;
-          }
-          return page.nesting[l_idx].size;
-        });
-
-      // if this buffer is part of a list hierarchy, we need to determine it's
-      // final size and allocate it here.
-      //
-      // for struct columns, higher levels of the output columns are shared between input
-      // columns. so don't compute any given level more than once.
-      if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
-        int size = thrust::reduce(rmm::exec_policy(stream), size_input, size_input + pages.size());
-
-        // if this is a list column add 1 for non-leaf levels for the terminating offset
-        if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
-
-        // allocate
-        out_buf.create(size, stream, mr);
-      }
-
-      // for nested hierarchies, compute per-page start offset
-      if (input_col.has_repetition) {
-        thrust::exclusive_scan_by_key(rmm::exec_policy(stream),
-                                      page_keys.begin(),
-                                      page_keys.end(),
-                                      size_input,
-                                      start_offset_output_iterator{pages.device_ptr(),
-                                                                   page_index.begin(),
-                                                                   0,
-                                                                   static_cast<int>(src_col_schema),
-                                                                   static_cast<int>(l_idx)});
-      }
-    }
-  }
-
-  // retrieve pages back
-  pages.device_to_host(stream);
+    pages.device_ptr(), chunks, min_row, num_rows, compute_num_rows, compute_string_sizes);
 }
 
 /**
@@ -1905,6 +1907,8 @@ void __host__ DecodePageData(hostdevice_vector<PageInfo>& pages,
                              size_t min_row,
                              rmm::cuda_stream_view stream)
 {
+  CUDF_EXPECTS(pages.size() > 0, "There is no page to decode");
+
   dim3 dim_block(block_size, 1);
   dim3 dim_grid(pages.size(), 1);  // 1 threadblock per page
 
diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu
index 19922bf7022..ffb4cb60a20 100644
--- a/cpp/src/io/parquet/page_hdr.cu
+++ b/cpp/src/io/parquet/page_hdr.cu
@@ -367,6 +367,7 @@ __global__ void __launch_bounds__(128)
       // definition levels
       bs->page.chunk_row = 0;
       bs->page.num_rows  = 0;
+      bs->page.str_bytes = 0;
     }
     num_values     = bs->ck.num_values;
     page_info      = bs->ck.page_info;
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 7849e05eb68..ccf4b056ae8 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -95,10 +95,13 @@ struct PageNestingInfo {
   // set at initialization
   int32_t max_def_level;
   int32_t max_rep_level;
+  cudf::type_id type;  // type of the corresponding cudf output column
+  bool nullable;
 
   // set during preprocessing
   int32_t size;  // this page/nesting-level's row count contribution to the output column, if fully
                  // decoded
+  int32_t batch_size;        // the size of the page for this batch
   int32_t page_start_value;  // absolute output start index in output column data
 
   // set during data decoding
@@ -152,6 +155,9 @@ struct PageInfo {
   int skipped_values;
   // # of values skipped in the actual data stream.
   int skipped_leaf_values;
+  // for string columns only, the size of all the chars in the string for
+  // this page. only valid/computed during the base preprocess pass
+  int32_t str_bytes;
 
   // nesting information (input/output) for each page
   int num_nesting_levels;
@@ -238,7 +244,7 @@ struct ColumnChunkDesc {
 };
 
 /**
- * @brief The struct to store raw/intermediate file data before parsing.
+ * @brief Struct to store raw/intermediate file data before parsing.
  */
 struct file_intermediate_data {
   std::vector<std::unique_ptr<datasource::buffer>> raw_page_data;
@@ -248,6 +254,23 @@ struct file_intermediate_data {
   hostdevice_vector<gpu::PageNestingInfo> page_nesting_info{};
 };
 
+/**
+ * @brief Struct to store intermediate page data for parsing each chunk of rows in chunked reading.
+ */
+struct chunk_intermediate_data {
+  rmm::device_uvector<int32_t> page_keys{0, rmm::cuda_stream_default};
+  rmm::device_uvector<int32_t> page_index{0, rmm::cuda_stream_default};
+  rmm::device_uvector<string_index_pair> str_dict_index{0, rmm::cuda_stream_default};
+};
+
+/**
+ * @brief Structs to identify the reading row range for each chunk of rows in chunked reading.
+ */
+struct chunk_read_info {
+  size_t skip_rows;
+  size_t num_rows;
+};
+
 /**
  * @brief Struct describing an encoder column
  */
@@ -378,35 +401,35 @@ void BuildStringDictionaryIndex(ColumnChunkDesc* chunks,
                                 rmm::cuda_stream_view stream);
 
 /**
- * @brief Preprocess column information for nested schemas.
+ * @brief Compute page output size information.
  *
- * There are several pieces of information we can't compute directly from row counts in
- * the parquet headers when dealing with nested schemas.
- * - The total sizes of all output columns at all nesting levels
- * - The starting output buffer offset for each page, for each nesting level
- * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders)
+ * When dealing with nested hierarchies (those that contain lists), or when doing a chunked
+ * read, we need to obtain more information up front than we have with just the row counts.
+ *
+ * - We need to determine the sizes of each output cudf column per page
+ * - We need to determine information about where to start decoding the value stream
+ *   if we are using custom user bounds (skip_rows / num_rows)
+ * - We need to determine actual number of top level rows per page
+ * - If we are doing a chunked read, we need to determine the total string size per page
  *
- * Note : this function is where output device memory is allocated for nested columns.
  *
  * @param pages All pages to be decoded
  * @param chunks All chunks to be decoded
- * @param input_columns Input column information
- * @param output_columns Output column information
  * @param num_rows Maximum number of rows to read
  * @param min_rows crop all rows below min_row
- * @param uses_custom_row_bounds Whether or not num_rows and min_rows represents user-specific
- * bounds
- * @param stream Cuda stream
+ * @param compute_num_rows If set to true, the num_rows field in PageInfo will be
+ * computed
+ * @param compute_string_sizes If set to true, the str_bytes field in PageInfo will
+ * be computed
+ * @param stream CUDA stream to use, default 0
  */
-void PreprocessColumnData(hostdevice_vector<PageInfo>& pages,
-                          hostdevice_vector<ColumnChunkDesc> const& chunks,
-                          std::vector<input_column_info>& input_columns,
-                          std::vector<cudf::io::detail::column_buffer>& output_columns,
-                          size_t num_rows,
-                          size_t min_row,
-                          bool uses_custom_row_bounds,
-                          rmm::cuda_stream_view stream,
-                          rmm::mr::device_memory_resource* mr);
+void ComputePageSizes(hostdevice_vector<PageInfo>& pages,
+                      hostdevice_vector<ColumnChunkDesc> const& chunks,
+                      size_t num_rows,
+                      size_t min_row,
+                      bool compute_num_rows,
+                      bool compute_string_sizes,
+                      rmm::cuda_stream_view stream);
 
 /**
  * @brief Launches kernel for reading the column data stored in the pages
diff --git a/cpp/src/io/parquet/reader.cpp b/cpp/src/io/parquet/reader.cpp
index 6be6987b7cb..1321e8073d7 100644
--- a/cpp/src/io/parquet/reader.cpp
+++ b/cpp/src/io/parquet/reader.cpp
@@ -40,4 +40,19 @@ table_with_metadata reader::read(parquet_reader_options const& options)
                      options.get_row_groups());
 }
 
+chunked_reader::chunked_reader(std::size_t chunk_read_limit,
+                               std::vector<std::unique_ptr<datasource>>&& sources,
+                               parquet_reader_options const& options,
+                               rmm::cuda_stream_view stream,
+                               rmm::mr::device_memory_resource* mr)
+{
+  _impl = std::make_unique<impl>(chunk_read_limit, std::move(sources), options, stream, mr);
+}
+
+chunked_reader::~chunked_reader() = default;
+
+bool chunked_reader::has_next() const { return _impl->has_next(); }
+
+table_with_metadata chunked_reader::read_chunk() const { return _impl->read_chunk(); }
+
 }  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index a61f63f6645..84d8cfc273f 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -28,22 +28,8 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
   auto& pages        = _file_itm_data.pages_info;
   auto& page_nesting = _file_itm_data.page_nesting_info;
 
-  auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
-    return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
-  };
-
-  // Count the number of string dictionary entries
-  // NOTE: Assumes first page in the chunk is always the dictionary page
-  size_t total_str_dict_indexes = 0;
-  for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
-    if (is_dict_chunk(chunks[c])) { total_str_dict_indexes += pages[page_count].num_input_values; }
-    page_count += chunks[c].max_num_pages;
-  }
-
-  // Build index for string dictionaries since they can't be indexed
-  // directly due to variable-sized elements
-  auto str_dict_index = cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(
-    total_str_dict_indexes, _stream);
+  // Should not reach here if there is no page data.
+  CUDF_EXPECTS(pages.size() > 0, "There is no page to decode");
 
   size_t const sum_max_depths = std::accumulate(
     chunks.begin(), chunks.end(), 0, [&](size_t cursum, gpu::ColumnChunkDesc const& chunk) {
@@ -58,16 +44,11 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
   auto chunk_offsets       = std::vector<size_t>();
 
   // Update chunks with pointers to column data.
-  for (size_t c = 0, page_count = 0, str_ofs = 0, chunk_off = 0; c < chunks.size(); c++) {
+  for (size_t c = 0, page_count = 0, chunk_off = 0; c < chunks.size(); c++) {
     input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
     CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema,
                  "Column/page schema index mismatch");
 
-    if (is_dict_chunk(chunks[c])) {
-      chunks[c].str_dict_index = str_dict_index.data() + str_ofs;
-      str_ofs += pages[page_count].num_input_values;
-    }
-
     size_t max_depth = _metadata->get_output_nesting_depth(chunks[c].src_col_schema);
     chunk_offsets.push_back(chunk_off);
 
@@ -139,18 +120,15 @@ void reader::impl::decode_page_data(size_t skip_rows, size_t num_rows)
   chunk_nested_valids.host_to_device(_stream);
   chunk_nested_data.host_to_device(_stream);
 
-  if (total_str_dict_indexes > 0) {
-    gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
-  }
-
   gpu::DecodePageData(pages, chunks, num_rows, skip_rows, _stream);
+
   pages.device_to_host(_stream);
   page_nesting.device_to_host(_stream);
   _stream.synchronize();
 
   // for list columns, add the final offset to every offset buffer.
   // TODO : make this happen in more efficiently. Maybe use thrust::for_each
-  // on each buffer.  Or potentially do it in PreprocessColumnData
+  // on each buffer.
   // Note : the reason we are doing this here instead of in the decode kernel is
   // that it is difficult/impossible for a given page to know that it is writing the very
   // last value that should then be followed by a terminator (because rows can span
@@ -211,7 +189,20 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                    parquet_reader_options const& options,
                    rmm::cuda_stream_view stream,
                    rmm::mr::device_memory_resource* mr)
-  : _stream(stream), _mr(mr), _sources(std::move(sources))
+  : impl(0 /*chunk_read_limit*/,
+         std::forward<std::vector<std::unique_ptr<cudf::io::datasource>>>(sources),
+         options,
+         stream,
+         mr)
+{
+}
+
+reader::impl::impl(std::size_t chunk_read_limit,
+                   std::vector<std::unique_ptr<datasource>>&& sources,
+                   parquet_reader_options const& options,
+                   rmm::cuda_stream_view stream,
+                   rmm::mr::device_memory_resource* mr)
+  : _stream{stream}, _mr{mr}, _sources{std::move(sources)}, _chunk_read_limit{chunk_read_limit}
 {
   // Open and parse the source dataset metadata
   _metadata = std::make_unique<aggregate_reader_metadata>(_sources);
@@ -233,6 +224,14 @@ reader::impl::impl(std::vector<std::unique_ptr<datasource>>&& sources,
                               options.is_enabled_use_pandas_metadata(),
                               _strings_to_categorical,
                               _timestamp_type.id());
+
+  // Save the states of the output buffers for reuse in `chunk_read()`.
+  // Don't need to do it if we read the file all at once.
+  if (_chunk_read_limit > 0) {
+    for (auto const& buff : _output_buffers) {
+      _output_buffers_template.emplace_back(column_buffer::empty_like(buff));
+    }
+  }
 }
 
 void reader::impl::prepare_data(size_type skip_rows,
@@ -240,39 +239,61 @@ void reader::impl::prepare_data(size_type skip_rows,
                                 bool uses_custom_row_bounds,
                                 host_span<std::vector<size_type> const> row_group_indices)
 {
+  if (_file_preprocessed) { return; }
+
   const auto [skip_rows_corrected, num_rows_corrected, row_groups_info] =
     _metadata->select_row_groups(row_group_indices, skip_rows, num_rows);
-  _skip_rows = skip_rows_corrected;
-  _num_rows  = num_rows_corrected;
 
   if (num_rows_corrected > 0 && row_groups_info.size() != 0 && _input_columns.size() != 0) {
     load_and_decompress_data(row_groups_info, num_rows_corrected);
+    preprocess_pages(
+      skip_rows_corrected, num_rows_corrected, uses_custom_row_bounds, _chunk_read_limit);
+
+    if (_chunk_read_limit == 0) {  // read the whole file at once
+      CUDF_EXPECTS(_chunk_read_info.size() == 1,
+                   "Reading the whole file should yield only one chunk.");
+    }
   }
+
+  _file_preprocessed = true;
 }
 
 table_with_metadata reader::impl::read_chunk_internal(bool uses_custom_row_bounds)
 {
-  auto out_metadata = table_metadata{};
+  // If `_output_metadata` has been constructed, just copy it over.
+  auto out_metadata = _output_metadata ? table_metadata{*_output_metadata} : table_metadata{};
 
   // output cudf columns as determined by the top level schema
   auto out_columns = std::vector<std::unique_ptr<column>>{};
   out_columns.reserve(_output_buffers.size());
 
-  if (_num_rows == 0) { return finalize_output(out_metadata, out_columns); }
+  if (!has_next() || _chunk_read_info.size() == 0) {
+    return finalize_output(out_metadata, out_columns);
+  }
 
-  allocate_columns(_skip_rows, _num_rows, uses_custom_row_bounds);
+  auto const& read_info = _chunk_read_info[_current_read_chunk++];
 
-  decode_page_data(_skip_rows, _num_rows);
+  // Allocate memory buffers for the output columns.
+  allocate_columns(read_info.skip_rows, read_info.num_rows, uses_custom_row_bounds);
 
-  // Create the final output cudf columns
+  // Parse data into the output buffers.
+  decode_page_data(read_info.skip_rows, read_info.num_rows);
+
+  // Create the final output cudf columns.
   for (size_t i = 0; i < _output_buffers.size(); ++i) {
-    auto const metadata        = _reader_column_schema.has_value()
-                                   ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
-                                   : std::nullopt;
-    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-    out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream, _mr));
+    auto const metadata = _reader_column_schema.has_value()
+                            ? std::make_optional<reader_column_schema>((*_reader_column_schema)[i])
+                            : std::nullopt;
+    // Only construct `out_metadata` if `_output_metadata` has not been cached.
+    if (!_output_metadata) {
+      column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+      out_columns.emplace_back(make_column(_output_buffers[i], &col_name, metadata, _stream, _mr));
+    } else {
+      out_columns.emplace_back(make_column(_output_buffers[i], nullptr, metadata, _stream, _mr));
+    }
   }
 
+  // Add empty columns if needed.
   return finalize_output(out_metadata, out_columns);
 }
 
@@ -281,21 +302,30 @@ table_with_metadata reader::impl::finalize_output(table_metadata& out_metadata,
 {
   // Create empty columns as needed (this can happen if we've ended up with no actual data to read)
   for (size_t i = out_columns.size(); i < _output_buffers.size(); ++i) {
-    column_name_info& col_name = out_metadata.schema_info.emplace_back("");
-    out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr));
+    if (!_output_metadata) {
+      column_name_info& col_name = out_metadata.schema_info.emplace_back("");
+      out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], &col_name, _stream, _mr));
+    } else {
+      out_columns.emplace_back(io::detail::empty_like(_output_buffers[i], nullptr, _stream, _mr));
+    }
   }
 
-  // Return column names (must match order of returned columns)
-  out_metadata.column_names.resize(_output_buffers.size());
-  for (size_t i = 0; i < _output_column_schemas.size(); i++) {
-    auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
-    out_metadata.column_names[i] = schema.name;
-  }
+  if (!_output_metadata) {
+    // Return column names (must match order of returned columns)
+    out_metadata.column_names.resize(_output_buffers.size());
+    for (size_t i = 0; i < _output_column_schemas.size(); i++) {
+      auto const& schema           = _metadata->get_schema(_output_column_schemas[i]);
+      out_metadata.column_names[i] = schema.name;
+    }
 
-  // Return user metadata
-  out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
-  out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
-                            out_metadata.per_file_user_data[0].end()};
+    // Return user metadata
+    out_metadata.per_file_user_data = _metadata->get_key_value_metadata();
+    out_metadata.user_data          = {out_metadata.per_file_user_data[0].begin(),
+                              out_metadata.per_file_user_data[0].end()};
+
+    // Finally, save the output table metadata into `_output_metadata` for reuse next time.
+    _output_metadata = std::make_unique<table_metadata>(out_metadata);
+  }
 
   return {std::make_unique<table>(std::move(out_columns)), std::move(out_metadata)};
 }
@@ -305,8 +335,36 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                        bool uses_custom_row_bounds,
                                        host_span<std::vector<size_type> const> row_group_indices)
 {
+  CUDF_EXPECTS(_chunk_read_limit == 0, "Reading the whole file must not have non-zero byte_limit.");
   prepare_data(skip_rows, num_rows, uses_custom_row_bounds, row_group_indices);
   return read_chunk_internal(uses_custom_row_bounds);
 }
 
+table_with_metadata reader::impl::read_chunk()
+{
+  // Reset the output buffers to their original states (right after reader construction).
+  // Don't need to do it if we read the file all at once.
+  if (_chunk_read_limit > 0) {
+    _output_buffers.resize(0);
+    for (auto const& buff : _output_buffers_template) {
+      _output_buffers.emplace_back(column_buffer::empty_like(buff));
+    }
+  }
+
+  prepare_data(0 /*skip_rows*/,
+               -1 /*num_rows, `-1` means unlimited*/,
+               true /*uses_custom_row_bounds*/,
+               {} /*row_group_indices, empty means read all row groups*/);
+  return read_chunk_internal(true);
+}
+
+bool reader::impl::has_next()
+{
+  prepare_data(0 /*skip_rows*/,
+               -1 /*num_rows, `-1` means unlimited*/,
+               true /*uses_custom_row_bounds*/,
+               {} /*row_group_indices, empty means read all row groups*/);
+  return _current_read_chunk < _chunk_read_info.size();
+}
+
 }  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index b53487c824b..6d42e9fab84 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -38,7 +38,6 @@
 #include <vector>
 
 namespace cudf::io::detail::parquet {
-
 /**
  * @brief Implementation for Parquet reader
  */
@@ -47,6 +46,9 @@ class reader::impl {
   /**
    * @brief Constructor from an array of dataset sources with reader options.
    *
+   * By using this constructor, each call to `read()` or `read_chunk()` will perform reading the
+   * entire given file.
+   *
    * @param sources Dataset sources
    * @param options Settings for controlling reading behavior
    * @param stream CUDA stream used for device memory operations and kernel launches
@@ -73,6 +75,46 @@ class reader::impl {
                            bool uses_custom_row_bounds,
                            host_span<std::vector<size_type> const> row_group_indices);
 
+  /**
+   * @brief Constructor from a chunk read limit and an array of dataset sources with reader options.
+   *
+   * By using this constructor, the reader will support iterative (chunked) reading through
+   * `has_next() ` and `read_chunk()`. For example:
+   * ```
+   *  do {
+   *    auto const chunk = reader.read_chunk();
+   *    // Process chunk
+   *  } while (reader.has_next());
+   *
+   * ```
+   *
+   * Reading the whole given file at once through `read()` function is still supported if
+   * `chunk_read_limit == 0` (i.e., no reading limit).
+   * In such case, `read_chunk()` will also return rows of the entire file.
+   *
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
+   * @param sources Dataset sources
+   * @param options Settings for controlling reading behavior
+   * @param stream CUDA stream used for device memory operations and kernel launches
+   * @param mr Device memory resource to use for device memory allocation
+   */
+  explicit impl(std::size_t chunk_read_limit,
+                std::vector<std::unique_ptr<datasource>>&& sources,
+                parquet_reader_options const& options,
+                rmm::cuda_stream_view stream,
+                rmm::mr::device_memory_resource* mr);
+
+  /**
+   * @copydoc cudf::io::chunked_parquet_reader::has_next
+   */
+  bool has_next();
+
+  /**
+   * @copydoc cudf::io::chunked_parquet_reader::read_chunk
+   */
+  table_with_metadata read_chunk();
+
  private:
   /**
    * @brief Perform the necessary data preprocessing for parsing file later on.
@@ -94,6 +136,29 @@ class reader::impl {
   void load_and_decompress_data(std::vector<row_group_info> const& row_groups_info,
                                 size_type num_rows);
 
+  /**
+   * @brief Perform some preprocessing for page data and also compute the split locations
+   * {skip_rows, num_rows} for chunked reading.
+   *
+   * There are several pieces of information we can't compute directly from row counts in
+   * the parquet headers when dealing with nested schemas:
+   * - The total sizes of all output columns at all nesting levels
+   * - The starting output buffer offset for each page, for each nesting level
+   *
+   * For flat schemas, these values are computed during header decoding (see gpuDecodePageHeaders).
+   *
+   * @param skip_rows Crop all rows below skip_rows
+   * @param num_rows Maximum number of rows to read
+   * @param uses_custom_row_bounds Whether or not num_rows and skip_rows represents user-specific
+   *        bounds
+   * @param chunk_read_limit Limit on total number of bytes to be returned per read,
+   *        or `0` if there is no limit
+   */
+  void preprocess_pages(size_t skip_rows,
+                        size_t num_rows,
+                        bool uses_custom_row_bounds,
+                        size_t chunk_read_limit);
+
   /**
    * @brief Allocate nesting information storage for all pages and set pointers to it.
    *
@@ -158,17 +223,26 @@ class reader::impl {
   // Buffers for generating output columns
   std::vector<column_buffer> _output_buffers;
 
+  // Buffers copied from `_output_buffers` after construction for reuse
+  std::vector<column_buffer> _output_buffers_template;
+
   // _output_buffers associated schema indices
   std::vector<int> _output_column_schemas;
 
+  // _output_buffers associated metadata
+  std::unique_ptr<table_metadata> _output_metadata;
+
   bool _strings_to_categorical = false;
   std::optional<std::vector<reader_column_schema>> _reader_column_schema;
   data_type _timestamp_type{type_id::EMPTY};
 
+  // Variables used for chunked reading:
   cudf::io::parquet::gpu::file_intermediate_data _file_itm_data;
-
-  size_type _skip_rows{0};
-  size_type _num_rows{0};
+  cudf::io::parquet::gpu::chunk_intermediate_data _chunk_itm_data;
+  std::vector<cudf::io::parquet::gpu::chunk_read_info> _chunk_read_info;
+  std::size_t _chunk_read_limit{0};
+  std::size_t _current_read_chunk{0};
+  bool _file_preprocessed{false};
 };
 
 }  // namespace cudf::io::detail::parquet
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index ca2009d3c74..38fce7d3263 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -20,17 +20,30 @@
 #include <io/utilities/config_utils.hpp>
 #include <io/utilities/time_utils.cuh>
 
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 
 #include <rmm/exec_policy.hpp>
 
+#include <thrust/binary_search.h>
 #include <thrust/fill.h>
+#include <thrust/functional.h>
+#include <thrust/iterator/constant_iterator.h>
+#include <thrust/iterator/discard_iterator.h>
+#include <thrust/iterator/iterator_categories.h>
+#include <thrust/iterator/transform_iterator.h>
 #include <thrust/logical.h>
+#include <thrust/reduce.h>
+#include <thrust/scan.h>
+#include <thrust/sequence.h>
+#include <thrust/sort.h>
+#include <thrust/transform.h>
+#include <thrust/unique.h>
 
 #include <numeric>
 
 namespace cudf::io::detail::parquet {
-
 namespace {
 
 /**
@@ -157,7 +170,7 @@ void generate_depth_remappings(std::map<int, std::pair<std::vector<int>, std::ve
 }
 
 /**
- * @brief Function that returns the required the number of bits to store a value
+ * @brief Return the required number of bits to store a value.
  */
 template <typename T = uint8_t>
 [[nodiscard]] T required_bits(uint32_t max_level)
@@ -197,7 +210,7 @@ template <typename T = uint8_t>
 }
 
 /**
- * @brief Reads compressed page data to device memory
+ * @brief Reads compressed page data to device memory.
  *
  * @param sources Dataset sources
  * @param page_data Buffers to hold compressed page data for each chunk
@@ -606,6 +619,9 @@ void reader::impl::allocate_nesting_info()
           pni[cur_depth].max_def_level = cur_schema.max_definition_level;
           pni[cur_depth].max_rep_level = cur_schema.max_repetition_level;
           pni[cur_depth].size          = 0;
+          pni[cur_depth].type =
+            to_type_id(cur_schema, _strings_to_categorical, _timestamp_type.id());
+          pni[cur_depth].nullable = cur_schema.repetition_type == OPTIONAL;
         }
 
         // move up the hierarchy
@@ -721,6 +737,7 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
   for (auto& task : read_rowgroup_tasks) {
     task.wait();
   }
+
   CUDF_EXPECTS(remaining_rows <= 0, "All rows data must be read.");
 
   // Process dataset chunk pages into output columns
@@ -762,11 +779,669 @@ void reader::impl::load_and_decompress_data(std::vector<row_group_info> const& r
   }
 }
 
-void reader::impl::allocate_columns(size_t min_row, size_t total_rows, bool uses_custom_row_bounds)
+namespace {
+
+struct cumulative_row_info {
+  size_t row_count;   // cumulative row count
+  size_t size_bytes;  // cumulative size in bytes
+  int key;            // schema index
+};
+
+#if defined(PREPROCESS_DEBUG)
+void print_pages(hostdevice_vector<gpu::PageInfo>& pages, rmm::cuda_stream_view _stream)
+{
+  pages.device_to_host(_stream, true);
+  for (size_t idx = 0; idx < pages.size(); idx++) {
+    auto const& p = pages[idx];
+    // skip dictionary pages
+    if (p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) { continue; }
+    printf(
+      "P(%lu, s:%d): chunk_row(%d), num_rows(%d), skipped_values(%d), skipped_leaf_values(%d)\n",
+      idx,
+      p.src_col_schema,
+      p.chunk_row,
+      p.num_rows,
+      p.skipped_values,
+      p.skipped_leaf_values);
+  }
+}
+
+void print_cumulative_page_info(hostdevice_vector<gpu::PageInfo>& pages,
+                                rmm::device_uvector<int32_t> const& page_index,
+                                rmm::device_uvector<cumulative_row_info> const& c_info,
+                                rmm::cuda_stream_view stream)
+{
+  pages.device_to_host(stream, true);
+
+  printf("------------\nCumulative sizes by page\n");
+
+  std::vector<int> schemas(pages.size());
+  std::vector<int> h_page_index(pages.size());
+  cudaMemcpy(
+    h_page_index.data(), page_index.data(), sizeof(int) * pages.size(), cudaMemcpyDeviceToHost);
+  std::vector<cumulative_row_info> h_cinfo(pages.size());
+  cudaMemcpy(h_cinfo.data(),
+             c_info.data(),
+             sizeof(cumulative_row_info) * pages.size(),
+             cudaMemcpyDeviceToHost);
+  auto schema_iter = cudf::detail::make_counting_transform_iterator(
+    0, [&](size_type i) { return pages[h_page_index[i]].src_col_schema; });
+  thrust::copy(thrust::seq, schema_iter, schema_iter + pages.size(), schemas.begin());
+  auto last = thrust::unique(thrust::seq, schemas.begin(), schemas.end());
+  schemas.resize(last - schemas.begin());
+  printf("Num schemas: %lu\n", schemas.size());
+
+  for (size_t idx = 0; idx < schemas.size(); idx++) {
+    printf("Schema %d\n", schemas[idx]);
+    for (size_t pidx = 0; pidx < pages.size(); pidx++) {
+      auto const& page = pages[h_page_index[pidx]];
+      if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY || page.src_col_schema != schemas[idx]) {
+        continue;
+      }
+      printf("\tP: {%lu, %lu}\n", h_cinfo[pidx].row_count, h_cinfo[pidx].size_bytes);
+    }
+  }
+}
+
+void print_cumulative_row_info(
+  host_span<cumulative_row_info const> sizes,
+  std::string const& label,
+  std::optional<std::vector<gpu::chunk_read_info>> splits = std::nullopt)
+{
+  if (splits.has_value()) {
+    printf("------------\nSplits\n");
+    for (size_t idx = 0; idx < splits->size(); idx++) {
+      printf("{%lu, %lu}\n", splits.value()[idx].skip_rows, splits.value()[idx].num_rows);
+    }
+  }
+
+  printf("------------\nCumulative sizes %s\n", label.c_str());
+  for (size_t idx = 0; idx < sizes.size(); idx++) {
+    printf("{%lu, %lu, %d}", sizes[idx].row_count, sizes[idx].size_bytes, sizes[idx].key);
+    if (splits.has_value()) {
+      // if we have a split at this row count and this is the last instance of this row count
+      auto start = thrust::make_transform_iterator(
+        splits->begin(), [](gpu::chunk_read_info const& i) { return i.skip_rows; });
+      auto end               = start + splits->size();
+      auto split             = std::find(start, end, sizes[idx].row_count);
+      auto const split_index = [&]() -> int {
+        if (split != end &&
+            ((idx == sizes.size() - 1) || (sizes[idx + 1].row_count > sizes[idx].row_count))) {
+          return static_cast<int>(std::distance(start, split));
+        }
+        return idx == 0 ? 0 : -1;
+      }();
+      if (split_index >= 0) {
+        printf(" <-- split {%lu, %lu}",
+               splits.value()[split_index].skip_rows,
+               splits.value()[split_index].num_rows);
+      }
+    }
+    printf("\n");
+  }
+}
+#endif  // PREPROCESS_DEBUG
+
+/**
+ * @brief Functor which reduces two cumulative_row_info structs of the same key.
+ */
+struct cumulative_row_sum {
+  cumulative_row_info operator()
+    __device__(cumulative_row_info const& a, cumulative_row_info const& b) const
+  {
+    return cumulative_row_info{a.row_count + b.row_count, a.size_bytes + b.size_bytes, a.key};
+  }
+};
+
+/**
+ * @brief Functor which computes the total data size for a given type of cudf column.
+ *
+ * In the case of strings, the return size does not include the chars themselves. That
+ * information is tracked separately (see PageInfo::str_bytes).
+ */
+struct row_size_functor {
+  __device__ size_t validity_size(size_t num_rows, bool nullable)
+  {
+    return nullable ? (cudf::util::div_rounding_up_safe(num_rows, size_t{32}) * 4) : 0;
+  }
+
+  template <typename T>
+  __device__ size_t operator()(size_t num_rows, bool nullable)
+  {
+    auto const element_size = sizeof(device_storage_type_t<T>);
+    return (element_size * num_rows) + validity_size(num_rows, nullable);
+  }
+};
+
+template <>
+__device__ size_t row_size_functor::operator()<list_view>(size_t num_rows, bool nullable)
+{
+  auto const offset_size = sizeof(offset_type);
+  // NOTE: Adding the + 1 offset here isn't strictly correct.  There will only be 1 extra offset
+  // for the entire column, whereas this is adding an extra offset per page.  So we will get a
+  // small over-estimate of the real size of the order :  # of pages * 4 bytes. It seems better
+  // to overestimate size somewhat than to underestimate it and potentially generate chunks
+  // that are too large.
+  return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
+}
+
+template <>
+__device__ size_t row_size_functor::operator()<struct_view>(size_t num_rows, bool nullable)
+{
+  return validity_size(num_rows, nullable);
+}
+
+template <>
+__device__ size_t row_size_functor::operator()<string_view>(size_t num_rows, bool nullable)
+{
+  // only returns the size of offsets and validity. the size of the actual string chars
+  // is tracked separately.
+  auto const offset_size = sizeof(offset_type);
+  // see note about offsets in the list_view template.
+  return (offset_size * (num_rows + 1)) + validity_size(num_rows, nullable);
+}
+
+/**
+ * @brief Functor which computes the total output cudf data size for all of
+ * the data in this page.
+ *
+ * Sums across all nesting levels.
+ */
+struct get_cumulative_row_info {
+  gpu::PageInfo const* const pages;
+
+  __device__ cumulative_row_info operator()(size_type index)
+  {
+    auto const& page = pages[index];
+    if (page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return cumulative_row_info{0, 0, page.src_col_schema};
+    }
+
+    // total nested size, not counting string data
+    auto iter =
+      cudf::detail::make_counting_transform_iterator(0, [page, index] __device__(size_type i) {
+        auto const& pni = page.nesting[i];
+        return cudf::type_dispatcher(
+          data_type{pni.type}, row_size_functor{}, pni.size, pni.nullable);
+      });
+
+    size_t const row_count = static_cast<size_t>(page.nesting[0].size);
+    return {row_count,
+            thrust::reduce(thrust::seq, iter, iter + page.num_nesting_levels) + page.str_bytes,
+            page.src_col_schema};
+  }
+};
+
+/**
+ * @brief Functor which computes the effective size of all input columns by page.
+ *
+ * For a given row, we want to find the cost of all pages for all columns involved
+ * in loading up to that row.  The complication here is that not all pages are the
+ * same size between columns. Example:
+ *
+ *              page row counts
+ * Column A:    0 <----> 100 <----> 200
+ * Column B:    0 <---------------> 200 <--------> 400
+                          |
+ * if we decide to split at row 100, we don't really know the actual amount of bytes in column B
+ * at that point.  So we have to proceed as if we are taking the bytes from all 200 rows of that
+ * page. Essentially, a conservative over-estimate of the real size.
+ */
+struct row_total_size {
+  cumulative_row_info const* c_info;
+  size_type const* key_offsets;
+  size_t num_keys;
+
+  __device__ cumulative_row_info operator()(cumulative_row_info const& i)
+  {
+    // sum sizes for each input column at this row
+    size_t sum = 0;
+    for (int idx = 0; idx < num_keys; idx++) {
+      auto const start = key_offsets[idx];
+      auto const end   = key_offsets[idx + 1];
+      auto iter        = cudf::detail::make_counting_transform_iterator(
+        0, [&] __device__(size_type i) { return c_info[i].row_count; });
+      auto const page_index =
+        thrust::lower_bound(thrust::seq, iter + start, iter + end, i.row_count) - iter;
+      sum += c_info[page_index].size_bytes;
+    }
+    return {i.row_count, sum, i.key};
+  }
+};
+
+/**
+ * @brief Given a vector of cumulative {row_count, byte_size} pairs and a chunk read
+ * limit, determine the set of splits.
+ *
+ * @param sizes Vector of cumulative {row_count, byte_size} pairs
+ * @param num_rows Total number of rows to read
+ * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns
+ */
+std::vector<gpu::chunk_read_info> find_splits(std::vector<cumulative_row_info> const& sizes,
+                                              size_t num_rows,
+                                              size_t chunk_read_limit)
+{
+  // now we have an array of {row_count, real output bytes}. just walk through it and generate
+  // splits.
+  // TODO: come up with a clever way to do this entirely in parallel. For now, as long as batch
+  // sizes are reasonably large, this shouldn't iterate too many times
+  std::vector<gpu::chunk_read_info> splits;
+  {
+    size_t cur_pos             = 0;
+    size_t cur_cumulative_size = 0;
+    size_t cur_row_count       = 0;
+    auto start = thrust::make_transform_iterator(sizes.begin(), [&](cumulative_row_info const& i) {
+      return i.size_bytes - cur_cumulative_size;
+    });
+    auto end   = start + sizes.size();
+    while (cur_row_count < num_rows) {
+      int64_t split_pos =
+        thrust::lower_bound(thrust::seq, start + cur_pos, end, chunk_read_limit) - start;
+
+      // if we're past the end, or if the returned bucket is > than the chunk_read_limit, move back
+      // one.
+      if (static_cast<size_t>(split_pos) >= sizes.size() ||
+          (sizes[split_pos].size_bytes - cur_cumulative_size > chunk_read_limit)) {
+        split_pos--;
+      }
+
+      // best-try. if we can't find something that'll fit, we have to go bigger. we're doing this in
+      // a loop because all of the cumulative sizes for all the pages are sorted into one big list.
+      // so if we had two columns, both of which had an entry {1000, 10000}, that entry would be in
+      // the list twice. so we have to iterate until we skip past all of them.  The idea is that we
+      // either do this, or we have to call unique() on the input first.
+      while (split_pos < (static_cast<int64_t>(sizes.size()) - 1) &&
+             (split_pos < 0 || sizes[split_pos].row_count == cur_row_count)) {
+        split_pos++;
+      }
+
+      auto const start_row = cur_row_count;
+      cur_row_count        = sizes[split_pos].row_count;
+      splits.push_back(gpu::chunk_read_info{start_row, cur_row_count - start_row});
+      cur_pos             = split_pos;
+      cur_cumulative_size = sizes[split_pos].size_bytes;
+    }
+  }
+  // print_cumulative_row_info(sizes, "adjusted", splits);
+
+  return splits;
+}
+
+/**
+ * @brief Given a set of pages that have had their sizes computed by nesting level and
+ * a limit on total read size, generate a set of {skip_rows, num_rows} pairs representing
+ * a set of reads that will generate output columns of total size <= `chunk_read_limit` bytes.
+ *
+ * @param pages All pages in the file
+ * @param id Additional intermediate information required to process the pages
+ * @param num_rows Total number of rows to read
+ * @param chunk_read_limit Limit on total number of bytes to be returned per read, for all columns
+ * @param stream CUDA stream to use, default 0
+ */
+std::vector<gpu::chunk_read_info> compute_splits(hostdevice_vector<gpu::PageInfo>& pages,
+                                                 gpu::chunk_intermediate_data const& id,
+                                                 size_t num_rows,
+                                                 size_t chunk_read_limit,
+                                                 rmm::cuda_stream_view stream)
+{
+  auto const& page_keys  = id.page_keys;
+  auto const& page_index = id.page_index;
+
+  // generate cumulative row counts and sizes
+  rmm::device_uvector<cumulative_row_info> c_info(page_keys.size(), stream);
+  // convert PageInfo to cumulative_row_info
+  auto page_input = thrust::make_transform_iterator(page_index.begin(),
+                                                    get_cumulative_row_info{pages.device_ptr()});
+  thrust::inclusive_scan_by_key(rmm::exec_policy(stream),
+                                page_keys.begin(),
+                                page_keys.end(),
+                                page_input,
+                                c_info.begin(),
+                                thrust::equal_to{},
+                                cumulative_row_sum{});
+  // print_cumulative_page_info(pages, page_index, c_info, stream);
+
+  // sort by row count
+  rmm::device_uvector<cumulative_row_info> c_info_sorted{c_info, stream};
+  thrust::sort(rmm::exec_policy(stream),
+               c_info_sorted.begin(),
+               c_info_sorted.end(),
+               [] __device__(cumulative_row_info const& a, cumulative_row_info const& b) {
+                 return a.row_count < b.row_count;
+               });
+
+  std::vector<cumulative_row_info> h_c_info_sorted(c_info_sorted.size());
+  cudaMemcpy(h_c_info_sorted.data(),
+             c_info_sorted.data(),
+             sizeof(cumulative_row_info) * c_info_sorted.size(),
+             cudaMemcpyDeviceToHost);
+  // print_cumulative_row_info(h_c_info_sorted, "raw");
+
+  // generate key offsets (offsets to the start of each partition of keys). worst case is 1 page per
+  // key
+  rmm::device_uvector<size_type> key_offsets(page_keys.size() + 1, stream);
+  auto const key_offsets_end = thrust::reduce_by_key(rmm::exec_policy(stream),
+                                                     page_keys.begin(),
+                                                     page_keys.end(),
+                                                     thrust::make_constant_iterator(1),
+                                                     thrust::make_discard_iterator(),
+                                                     key_offsets.begin())
+                                 .second;
+  size_t const num_unique_keys = key_offsets_end - key_offsets.begin();
+  thrust::exclusive_scan(
+    rmm::exec_policy(stream), key_offsets.begin(), key_offsets.end(), key_offsets.begin());
+
+  // adjust the cumulative info such that for each row count, the size includes any pages that span
+  // that row count. this is so that if we have this case:
+  //              page row counts
+  // Column A:    0 <----> 100 <----> 200
+  // Column B:    0 <---------------> 200 <--------> 400
+  //                        |
+  // if we decide to split at row 100, we don't really know the actual amount of bytes in column B
+  // at that point.  So we have to proceed as if we are taking the bytes from all 200 rows of that
+  // page.
+  //
+  rmm::device_uvector<cumulative_row_info> aggregated_info(c_info.size(), stream);
+  thrust::transform(rmm::exec_policy(stream),
+                    c_info_sorted.begin(),
+                    c_info_sorted.end(),
+                    aggregated_info.begin(),
+                    row_total_size{c_info.data(), key_offsets.data(), num_unique_keys});
+
+  // bring back to the cpu
+  std::vector<cumulative_row_info> h_aggregated_info(aggregated_info.size());
+  cudaMemcpyAsync(h_aggregated_info.data(),
+                  aggregated_info.data(),
+                  sizeof(cumulative_row_info) * c_info.size(),
+                  cudaMemcpyDeviceToHost,
+                  stream);
+  stream.synchronize();
+
+  return find_splits(h_aggregated_info, num_rows, chunk_read_limit);
+}
+
+struct get_page_chunk_idx {
+  __device__ size_type operator()(gpu::PageInfo const& page) { return page.chunk_idx; }
+};
+
+struct get_page_num_rows {
+  __device__ size_type operator()(gpu::PageInfo const& page) { return page.num_rows; }
+};
+
+struct get_page_schema {
+  __device__ size_type operator()(gpu::PageInfo const& page) { return page.src_col_schema; }
+};
+
+/**
+ * @brief Returns the size field of a PageInfo struct for a given depth, keyed by schema.
+ */
+struct get_page_nesting_size {
+  size_type const src_col_schema;
+  size_type const depth;
+  gpu::PageInfo const* const pages;
+
+  __device__ size_type operator()(int index) const
+  {
+    auto const& page = pages[index];
+    if (page.src_col_schema != src_col_schema || page.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return 0;
+    }
+    return page.nesting[depth].batch_size;
+  }
+};
+
+/**
+ * @brief Writes to the chunk_row field of the PageInfo struct.
+ */
+struct chunk_row_output_iter {
+  gpu::PageInfo* p;
+  using value_type        = size_type;
+  using difference_type   = size_type;
+  using pointer           = size_type*;
+  using reference         = size_type&;
+  using iterator_category = thrust::output_device_iterator_tag;
+
+  __host__ __device__ chunk_row_output_iter operator+(int i)
+  {
+    return chunk_row_output_iter{p + i};
+  }
+
+  __host__ __device__ void operator++() { p++; }
+
+  __device__ reference operator[](int i) { return p[i].chunk_row; }
+  __device__ reference operator*() { return p->chunk_row; }
+};
+
+/**
+ * @brief Writes to the page_start_value field of the PageNestingInfo struct, keyed by schema.
+ */
+struct start_offset_output_iterator {
+  gpu::PageInfo* pages;
+  int const* page_indices;
+  int cur_index;
+  int src_col_schema;
+  int nesting_depth;
+  int empty               = 0;
+  using value_type        = size_type;
+  using difference_type   = size_type;
+  using pointer           = size_type*;
+  using reference         = size_type&;
+  using iterator_category = thrust::output_device_iterator_tag;
+
+  constexpr void operator=(start_offset_output_iterator const& other)
+  {
+    pages          = other.pages;
+    page_indices   = other.page_indices;
+    cur_index      = other.cur_index;
+    src_col_schema = other.src_col_schema;
+    nesting_depth  = other.nesting_depth;
+  }
+
+  constexpr start_offset_output_iterator operator+(int i)
+  {
+    return start_offset_output_iterator{
+      pages, page_indices, cur_index + i, src_col_schema, nesting_depth};
+  }
+
+  constexpr void operator++() { cur_index++; }
+
+  __device__ reference operator[](int i) { return dereference(cur_index + i); }
+  __device__ reference operator*() { return dereference(cur_index); }
+
+ private:
+  __device__ reference dereference(int index)
+  {
+    gpu::PageInfo const& p = pages[page_indices[index]];
+    if (p.src_col_schema != src_col_schema || p.flags & gpu::PAGEINFO_FLAGS_DICTIONARY) {
+      return empty;
+    }
+    return p.nesting[nesting_depth].page_start_value;
+  }
+};
+
+}  // anonymous namespace
+
+void reader::impl::preprocess_pages(size_t skip_rows,
+                                    size_t num_rows,
+                                    bool uses_custom_row_bounds,
+                                    size_t chunk_read_limit)
+{
+  auto& chunks = _file_itm_data.chunks;
+  auto& pages  = _file_itm_data.pages_info;
+
+  // iterate over all input columns and determine if they contain lists so we can further
+  // preprocess them.
+  bool has_lists = false;
+  for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+    auto const& input_col  = _input_columns[idx];
+    size_t const max_depth = input_col.nesting_depth();
+
+    auto* cols = &_output_buffers;
+    for (size_t l_idx = 0; l_idx < max_depth; l_idx++) {
+      auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+      cols          = &out_buf.children;
+
+      // if this has a list parent, we have to get column sizes from the
+      // data computed during gpu::ComputePageSizes
+      if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
+        has_lists = true;
+        break;
+      }
+    }
+    if (has_lists) { break; }
+  }
+
+  // generate string dict indices if necessary
+  {
+    auto is_dict_chunk = [](const gpu::ColumnChunkDesc& chunk) {
+      return (chunk.data_type & 0x7) == BYTE_ARRAY && chunk.num_dict_pages > 0;
+    };
+
+    // Count the number of string dictionary entries
+    // NOTE: Assumes first page in the chunk is always the dictionary page
+    size_t total_str_dict_indexes = 0;
+    for (size_t c = 0, page_count = 0; c < chunks.size(); c++) {
+      if (is_dict_chunk(chunks[c])) {
+        total_str_dict_indexes += pages[page_count].num_input_values;
+      }
+      page_count += chunks[c].max_num_pages;
+    }
+
+    // Build index for string dictionaries since they can't be indexed
+    // directly due to variable-sized elements
+    _chunk_itm_data.str_dict_index =
+      cudf::detail::make_zeroed_device_uvector_async<string_index_pair>(total_str_dict_indexes,
+                                                                        _stream);
+
+    // Update chunks with pointers to string dict indices
+    for (size_t c = 0, page_count = 0, str_ofs = 0; c < chunks.size(); c++) {
+      input_column_info const& input_col = _input_columns[chunks[c].src_col_index];
+      CUDF_EXPECTS(input_col.schema_idx == chunks[c].src_col_schema,
+                   "Column/page schema index mismatch");
+      if (is_dict_chunk(chunks[c])) {
+        chunks[c].str_dict_index = _chunk_itm_data.str_dict_index.data() + str_ofs;
+        str_ofs += pages[page_count].num_input_values;
+      }
+
+      // column_data_base will always point to leaf data, even for nested types.
+      page_count += chunks[c].max_num_pages;
+    }
+
+    if (total_str_dict_indexes > 0) {
+      chunks.host_to_device(_stream);
+      gpu::BuildStringDictionaryIndex(chunks.device_ptr(), chunks.size(), _stream);
+    }
+  }
+
+  // intermediate data we will need for further chunked reads
+  if (has_lists || chunk_read_limit > 0) {
+    // computes:
+    // PageNestingInfo::num_rows for each page. the true number of rows (taking repetition into
+    // account), not just the number of values. PageNestingInfo::size for each level of nesting, for
+    // each page.
+    //
+    // we will be applying a later "trim" pass if skip_rows/num_rows is being used, which can happen
+    // if:
+    // - user has passed custom row bounds
+    // - we will be doing a chunked read
+    gpu::ComputePageSizes(pages,
+                          chunks,
+                          0,  // 0-max size_t. process all possible rows
+                          std::numeric_limits<size_t>::max(),
+                          true,                  // compute num_rows
+                          chunk_read_limit > 0,  // compute string sizes
+                          _stream);
+
+    // computes:
+    // PageInfo::chunk_row (the absolute start row index) for all pages
+    // Note: this is doing some redundant work for pages in flat hierarchies.  chunk_row has already
+    // been computed during header decoding. the overall amount of work here is very small though.
+    auto key_input  = thrust::make_transform_iterator(pages.device_ptr(), get_page_chunk_idx{});
+    auto page_input = thrust::make_transform_iterator(pages.device_ptr(), get_page_num_rows{});
+    thrust::exclusive_scan_by_key(rmm::exec_policy(_stream),
+                                  key_input,
+                                  key_input + pages.size(),
+                                  page_input,
+                                  chunk_row_output_iter{pages.device_ptr()});
+
+    // compute page ordering.
+    //
+    // ordering of pages is by input column schema, repeated across row groups.  so
+    // if we had 3 columns, each with 2 pages, and 1 row group, our schema values might look like
+    //
+    // 1, 1, 2, 2, 3, 3
+    //
+    // However, if we had more than one row group, the pattern would be
+    //
+    // 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3
+    // ^ row group 0     |
+    //                   ^ row group 1
+    //
+    // To use exclusive_scan_by_key, the ordering we actually want is
+    //
+    // 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3
+    //
+    // We also need to preserve key-relative page ordering, so we need to use a stable sort.
+    _chunk_itm_data.page_keys  = rmm::device_uvector<int>(pages.size(), _stream);
+    _chunk_itm_data.page_index = rmm::device_uvector<int>(pages.size(), _stream);
+    auto& page_keys            = _chunk_itm_data.page_keys;
+    auto& page_index           = _chunk_itm_data.page_index;
+    {
+      thrust::transform(rmm::exec_policy(_stream),
+                        pages.device_ptr(),
+                        pages.device_ptr() + pages.size(),
+                        page_keys.begin(),
+                        get_page_schema{});
+
+      thrust::sequence(rmm::exec_policy(_stream), page_index.begin(), page_index.end());
+      thrust::stable_sort_by_key(rmm::exec_policy(_stream),
+                                 page_keys.begin(),
+                                 page_keys.end(),
+                                 page_index.begin(),
+                                 thrust::less<int>());
+    }
+
+    // retrieve pages back
+    pages.device_to_host(_stream, true);
+
+#if defined(PREPROCESS_DEBUG)
+    print_pages(pages, _stream);
+#endif
+  }
+
+  // compute splits if necessary. otherwise retun a single split representing
+  // the whole file.
+  _chunk_read_info = chunk_read_limit > 0
+                       ? compute_splits(pages, _chunk_itm_data, num_rows, chunk_read_limit, _stream)
+                       : std::vector<gpu::chunk_read_info>{{skip_rows, num_rows}};
+}
+
+void reader::impl::allocate_columns(size_t skip_rows, size_t num_rows, bool uses_custom_row_bounds)
 {
   auto const& chunks = _file_itm_data.chunks;
   auto& pages        = _file_itm_data.pages_info;
 
+  // Should not reach here if there is no page data.
+  CUDF_EXPECTS(pages.size() > 0, "There is no page to parse");
+
+  // computes:
+  // PageNestingInfo::batch_size for each level of nesting, for each page, taking row bounds into
+  // account. PageInfo::skipped_values, which tells us where to start decoding in the input to
+  // respect the user bounds. It is only necessary to do this second pass if uses_custom_row_bounds
+  // is set (if the user has specified artifical bounds).
+  if (uses_custom_row_bounds) {
+    gpu::ComputePageSizes(pages,
+                          chunks,
+                          skip_rows,
+                          num_rows,
+                          false,  // num_rows is already computed
+                          false,  // no need to compute string sizes
+                          _stream);
+#if defined(PREPROCESS_DEBUG)
+    print_pages(pages, _stream);
+#endif
+  }
+
   // iterate over all input columns and allocate any associated output
   // buffers if they are not part of a list hierarchy. mark down
   // if we have any list columns that need further processing.
@@ -780,8 +1455,8 @@ void reader::impl::allocate_columns(size_t min_row, size_t total_rows, bool uses
       auto& out_buf = (*cols)[input_col.nesting[l_idx]];
       cols          = &out_buf.children;
 
-      // if this has a list parent, we will have to do further work in gpu::PreprocessColumnData
-      // to know how big this buffer actually is.
+      // if this has a list parent, we have to get column sizes from the
+      // data computed during gpu::ComputePageSizes
       if (out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) {
         has_lists = true;
       }
@@ -789,25 +1464,63 @@ void reader::impl::allocate_columns(size_t min_row, size_t total_rows, bool uses
       else if (out_buf.size == 0) {
         // add 1 for the offset if this is a list column
         out_buf.create(
-          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? total_rows + 1 : total_rows,
+          out_buf.type.id() == type_id::LIST && l_idx < max_depth ? num_rows + 1 : num_rows,
           _stream,
           _mr);
       }
     }
   }
 
-  // if we have columns containing lists, further preprocessing is necessary.
+  // compute output column sizes by examining the pages of the -input- columns
   if (has_lists) {
-    gpu::PreprocessColumnData(pages,
-                              chunks,
-                              _input_columns,
-                              _output_buffers,
-                              total_rows,
-                              min_row,
-                              uses_custom_row_bounds,
-                              _stream,
-                              _mr);
-    _stream.synchronize();
+    auto& page_keys  = _chunk_itm_data.page_keys;
+    auto& page_index = _chunk_itm_data.page_index;
+    for (size_t idx = 0; idx < _input_columns.size(); idx++) {
+      auto const& input_col = _input_columns[idx];
+      auto src_col_schema   = input_col.schema_idx;
+      size_t max_depth      = input_col.nesting_depth();
+
+      auto* cols = &_output_buffers;
+      for (size_t l_idx = 0; l_idx < input_col.nesting_depth(); l_idx++) {
+        auto& out_buf = (*cols)[input_col.nesting[l_idx]];
+        cols          = &out_buf.children;
+
+        // size iterator. indexes pages by sorted order
+        auto size_input = thrust::make_transform_iterator(
+          page_index.begin(),
+          get_page_nesting_size{src_col_schema, static_cast<size_type>(l_idx), pages.device_ptr()});
+
+        // if this buffer is part of a list hierarchy, we need to determine it's
+        // final size and allocate it here.
+        //
+        // for struct columns, higher levels of the output columns are shared between input
+        // columns. so don't compute any given level more than once.
+        if ((out_buf.user_data & PARQUET_COLUMN_BUFFER_FLAG_HAS_LIST_PARENT) && out_buf.size == 0) {
+          int size =
+            thrust::reduce(rmm::exec_policy(_stream), size_input, size_input + pages.size());
+
+          // if this is a list column add 1 for non-leaf levels for the terminating offset
+          if (out_buf.type.id() == type_id::LIST && l_idx < max_depth) { size++; }
+
+          // allocate
+          out_buf.create(size, _stream, _mr);
+        }
+
+        // for nested hierarchies, compute per-page start offset
+        if (input_col.has_repetition) {
+          thrust::exclusive_scan_by_key(
+            rmm::exec_policy(_stream),
+            page_keys.begin(),
+            page_keys.end(),
+            size_input,
+            start_offset_output_iterator{pages.device_ptr(),
+                                         page_index.begin(),
+                                         0,
+                                         static_cast<int>(src_col_schema),
+                                         static_cast<int>(l_idx)});
+        }
+      }
+    }
   }
 }
 
diff --git a/cpp/src/io/utilities/column_buffer.cpp b/cpp/src/io/utilities/column_buffer.cpp
index de145486662..89ba5c598e8 100644
--- a/cpp/src/io/utilities/column_buffer.cpp
+++ b/cpp/src/io/utilities/column_buffer.cpp
@@ -55,6 +55,33 @@ void column_buffer::create(size_type _size,
   }
 }
 
+namespace {
+
+/**
+ * @brief Recursively copy `name` and `user_data` fields of one buffer to another.
+ *
+ * @param buff The old output buffer
+ * @param new_buff The new output buffer
+ */
+void copy_buffer_data(column_buffer const& buff, column_buffer& new_buff)
+{
+  new_buff.name      = buff.name;
+  new_buff.user_data = buff.user_data;
+  for (auto const& child : buff.children) {
+    auto& new_child = new_buff.children.emplace_back(column_buffer(child.type, child.is_nullable));
+    copy_buffer_data(child, new_child);
+  }
+}
+
+}  // namespace
+
+column_buffer column_buffer::empty_like(column_buffer const& input)
+{
+  auto new_buff = column_buffer(input.type, input.is_nullable);
+  copy_buffer_data(input, new_buff);
+  return new_buff;
+}
+
 /**
  * @copydoc cudf::io::detail::make_column
  */
diff --git a/cpp/src/io/utilities/column_buffer.hpp b/cpp/src/io/utilities/column_buffer.hpp
index 8ae3d39a3ba..8f181157fae 100644
--- a/cpp/src/io/utilities/column_buffer.hpp
+++ b/cpp/src/io/utilities/column_buffer.hpp
@@ -104,10 +104,14 @@ struct column_buffer {
   {
     return static_cast<T*>(_null_mask.data());
   }
-  auto null_mask_size() { return _null_mask.size(); };
+  auto null_mask_size() { return _null_mask.size(); }
 
   auto& null_count() { return _null_count; }
 
+  // Create a new column_buffer that has empty data but with the same basic information as the
+  // input column, including same type, nullability, name, and user_data.
+  static column_buffer empty_like(column_buffer const& input);
+
   std::unique_ptr<rmm::device_uvector<string_index_pair>> _strings;
   rmm::device_buffer _data{};
   rmm::device_buffer _null_mask{};
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index c602ccc7374..bdf74368ffe 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -223,7 +223,7 @@ ConfigureTest(DECOMPRESSION_TEST io/comp/decomp_test.cpp)
 ConfigureTest(CSV_TEST io/csv_test.cpp)
 ConfigureTest(FILE_IO_TEST io/file_io_test.cpp)
 ConfigureTest(ORC_TEST io/orc_test.cpp)
-ConfigureTest(PARQUET_TEST io/parquet_test.cpp)
+ConfigureTest(PARQUET_TEST io/parquet_test.cpp io/parquet_chunked_reader_test.cpp)
 ConfigureTest(JSON_TEST io/json_test.cpp io/json_chunked_reader.cpp)
 ConfigureTest(JSON_TYPE_CAST_TEST io/json_type_cast_test.cu)
 ConfigureTest(NESTED_JSON_TEST io/nested_json_test.cpp io/json_tree.cpp)
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cpp b/cpp/tests/io/parquet_chunked_reader_test.cpp
new file mode 100644
index 00000000000..76a65857e6f
--- /dev/null
+++ b/cpp/tests/io/parquet_chunked_reader_test.cpp
@@ -0,0 +1,887 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/column_utilities.hpp>
+#include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/io_metadata_utilities.hpp>
+#include <cudf_test/iterator_utilities.hpp>
+#include <cudf_test/table_utilities.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/column/column.hpp>
+#include <cudf/column/column_factories.hpp>
+#include <cudf/concatenate.hpp>
+#include <cudf/copying.hpp>
+#include <cudf/detail/iterator.cuh>
+#include <cudf/detail/structs/utilities.hpp>
+#include <cudf/fixed_point/fixed_point.hpp>
+#include <cudf/io/data_sink.hpp>
+#include <cudf/io/datasource.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/table/table.hpp>
+#include <cudf/table/table_view.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <src/io/parquet/compact_protocol_reader.hpp>
+#include <src/io/parquet/parquet.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+#include <rmm/cuda_stream_view.hpp>
+
+#include <fstream>
+#include <type_traits>
+
+namespace {
+// Global environment for temporary files
+auto const temp_env = static_cast<cudf::test::TempDirTestEnvironment*>(
+  ::testing::AddGlobalTestEnvironment(new cudf::test::TempDirTestEnvironment));
+
+using int32s_col       = cudf::test::fixed_width_column_wrapper<int32_t>;
+using int64s_col       = cudf::test::fixed_width_column_wrapper<int64_t>;
+using strings_col      = cudf::test::strings_column_wrapper;
+using structs_col      = cudf::test::structs_column_wrapper;
+using int32s_lists_col = cudf::test::lists_column_wrapper<int32_t>;
+
+auto write_file(std::vector<std::unique_ptr<cudf::column>>& input_columns,
+                std::string const& filename,
+                bool nullable,
+                std::size_t max_page_size_bytes = cudf::io::default_max_page_size_bytes,
+                std::size_t max_page_size_rows  = cudf::io::default_max_page_size_rows)
+{
+  // Just shift nulls of the next column by one position to avoid having all nulls in the same
+  // table rows.
+  if (nullable) {
+    // Generate deterministic bitmask instead of random bitmask for easy computation of data size.
+    auto const valid_iter = cudf::detail::make_counting_transform_iterator(
+      0, [](cudf::size_type i) { return i % 4 != 3; });
+
+    cudf::size_type offset{0};
+    for (auto& col : input_columns) {
+      auto const col_typeid = col->type().id();
+      col->set_null_mask(
+        cudf::test::detail::make_null_mask(valid_iter + offset, valid_iter + col->size() + offset));
+
+      if (col_typeid == cudf::type_id::STRUCT) {
+        auto const null_mask  = col->view().null_mask();
+        auto const null_count = col->null_count();
+
+        for (cudf::size_type idx = 0; idx < col->num_children(); ++idx) {
+          cudf::structs::detail::superimpose_parent_nulls(null_mask,
+                                                          null_count,
+                                                          col->child(idx),
+                                                          cudf::get_default_stream(),
+                                                          rmm::mr::get_current_device_resource());
+        }
+      }
+
+      if (col_typeid == cudf::type_id::LIST || col_typeid == cudf::type_id::STRUCT ||
+          col_typeid == cudf::type_id::STRING) {
+        col = cudf::purge_nonempty_nulls(col->view());
+      }
+    }
+  }
+
+  auto input_table = std::make_unique<cudf::table>(std::move(input_columns));
+  auto filepath =
+    temp_env->get_temp_filepath(nullable ? filename + "_nullable.parquet" : filename + ".parquet");
+
+  auto const write_opts =
+    cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, *input_table)
+      .max_page_size_bytes(max_page_size_bytes)
+      .max_page_size_rows(max_page_size_rows)
+      .build();
+  cudf::io::write_parquet(write_opts);
+
+  return std::pair{std::move(input_table), std::move(filepath)};
+}
+
+auto chunked_read(std::string const& filepath, std::size_t byte_limit)
+{
+  auto const read_opts =
+    cudf::io::parquet_reader_options::builder(cudf::io::source_info{filepath}).build();
+  auto reader = cudf::io::chunked_parquet_reader(byte_limit, read_opts);
+
+  auto num_chunks = 0;
+  auto out_tables = std::vector<std::unique_ptr<cudf::table>>{};
+
+  do {
+    auto chunk = reader.read_chunk();
+    // If the input file is empty, the first call to `read_chunk` will return an empty table.
+    // Thus, we only check for non-empty output table from the second call.
+    if (num_chunks > 0) {
+      CUDF_EXPECTS(chunk.tbl->num_rows() != 0, "Number of rows in the new chunk is zero.");
+    }
+    ++num_chunks;
+    out_tables.emplace_back(std::move(chunk.tbl));
+  } while (reader.has_next());
+
+  auto out_tviews = std::vector<cudf::table_view>{};
+  for (auto const& tbl : out_tables) {
+    out_tviews.emplace_back(tbl->view());
+  }
+
+  return std::pair(cudf::concatenate(out_tviews), num_chunks);
+}
+
+}  // namespace
+
+struct ParquetChunkedReaderTest : public cudf::test::BaseFixture {
+};
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadNoData)
+{
+  std::vector<std::unique_ptr<cudf::column>> input_columns;
+  input_columns.emplace_back(int32s_col{}.release());
+  input_columns.emplace_back(int64s_col{}.release());
+
+  auto const [expected, filepath] = write_file(input_columns, "chunked_read_empty", false);
+  auto const [result, num_chunks] = chunked_read(filepath, 1'000);
+  EXPECT_EQ(num_chunks, 1);
+  EXPECT_EQ(result->num_rows(), 0);
+  EXPECT_EQ(result->num_columns(), 2);
+  CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadSimpleData)
+{
+  auto constexpr num_rows = 40'000;
+
+  auto const generate_input = [num_rows](bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const value_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
+    input_columns.emplace_back(int64s_col(value_iter, value_iter + num_rows).release());
+
+    return write_file(input_columns, "chunked_read_simple", nullable);
+  };
+
+  {
+    auto const [expected, filepath] = generate_input(false);
+    auto const [result, num_chunks] = chunked_read(filepath, 240'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  {
+    auto const [expected, filepath] = generate_input(true);
+    auto const [result, num_chunks] = chunked_read(filepath, 240'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadBoundaryCases)
+{
+  // Tests some specific boundary conditions in the split calculations.
+
+  auto constexpr num_rows = 40'000;
+
+  auto const [expected, filepath] = [num_rows]() {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const value_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
+    return write_file(input_columns, "chunked_read_simple_boundary", false /*nullable*/);
+  }();
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 1);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit slightly less than one page of data
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 79'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit exactly the size one page of data
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 80'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit slightly more the size one page of data
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 81'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit slightly less than two pages of data
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 159'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit exactly the size of two pages of data minus one byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 159'999);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit exactly the size of two pages of data
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 160'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a limit slightly more the size two pages of data
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 161'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithString)
+{
+  auto constexpr num_rows = 60'000;
+
+  auto const generate_input = [num_rows](bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const value_iter = thrust::make_counting_iterator(0);
+
+    // ints                                            Page    total bytes   cumulative bytes
+    // 20000 rows of 4 bytes each                    = A0      80000         80000
+    // 20000 rows of 4 bytes each                    = A1      80000         160000
+    // 20000 rows of 4 bytes each                    = A2      80000         240000
+    input_columns.emplace_back(int32s_col(value_iter, value_iter + num_rows).release());
+
+    // strings                                         Page    total bytes   cumulative bytes
+    // 20000 rows of 1 char each    (20000  + 80004) = B0      100004        100004
+    // 20000 rows of 4 chars each   (80000  + 80004) = B1      160004        260008
+    // 20000 rows of 16 chars each  (320000 + 80004) = B2      400004        660012
+    auto const strings  = std::vector<std::string>{"a", "bbbb", "cccccccccccccccc"};
+    auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) {
+      if (i < 20000) { return strings[0]; }
+      if (i < 40000) { return strings[1]; }
+      return strings[2];
+    });
+    input_columns.emplace_back(strings_col(str_iter, str_iter + num_rows).release());
+
+    // Cumulative sizes:
+    // A0 + B0 :  180004
+    // A1 + B1 :  420008
+    // A2 + B2 :  900012
+    //                                    skip_rows / num_rows
+    // byte_limit==500000  should give 2 chunks: {0, 40000}, {40000, 20000}
+    // byte_limit==1000000 should give 1 chunks: {0, 60000},
+    return write_file(input_columns,
+                      "chunked_read_with_strings",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  };
+
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Other tests:
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructs)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const generate_input = [num_rows](bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+    input_columns.emplace_back([=] {
+      auto child1 = int32s_col(int_iter, int_iter + num_rows);
+      auto child2 = int32s_col(int_iter + num_rows, int_iter + num_rows * 2);
+
+      auto const str_iter = cudf::detail::make_counting_transform_iterator(
+        0, [&](int32_t i) { return std::to_string(i); });
+      auto child3 = strings_col{str_iter, str_iter + num_rows};
+
+      return structs_col{{child1, child2, child3}}.release();
+    }());
+
+    return write_file(input_columns,
+                      "chunked_read_with_structs",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  };
+
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Other tests:
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 500'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 500'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsNoNulls)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const [expected, filepath] = [num_rows]() {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    // 20000 rows in 1 page consist of:
+    //
+    // 20001 offsets :   80004  bytes
+    // 30000 ints    :   120000 bytes
+    // total         :   200004 bytes
+    auto const template_lists = int32s_lists_col{
+      int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{1, 2}, int32s_lists_col{3, 4, 5}};
+
+    auto const gather_iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
+    auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
+    input_columns.emplace_back(
+      std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
+
+    return write_file(input_columns,
+                      "chunked_read_with_lists_no_null",
+                      false /*nullable*/,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  }();
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 200'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size exactly 1 page
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 200'004);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 400'008);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size 2 pages minus one byte: each chunk will be just one page
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 400'007);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsHavingNulls)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const [expected, filepath] = [num_rows]() {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    // 20000 rows in 1 page consist of:
+    //
+    // 625 validity words :   2500 bytes   (a null every 4 rows: null at indices [3, 7, 11, ...])
+    // 20001 offsets      :   80004  bytes
+    // 15000 ints         :   60000 bytes
+    // total              :   142504 bytes
+    auto const template_lists =
+      int32s_lists_col{// these will all be null
+                       int32s_lists_col{},
+                       int32s_lists_col{0},
+                       int32s_lists_col{1, 2},
+                       int32s_lists_col{3, 4, 5, 6, 7, 8, 9} /* this list will be nullified out */};
+    auto const gather_iter =
+      cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
+    auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
+    input_columns.emplace_back(
+      std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
+
+    return write_file(input_columns,
+                      "chunked_read_with_lists_nulls",
+                      true /*nullable*/,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  }();
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size slightly less than 1 page (forcing it to be at least 1 page per read)
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 142'500);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size exactly 1 page
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 142'504);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size 2 pages. 3 chunks (2 pages + 2 pages + 1 page)
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 285'008);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+
+  // chunk size 2 pages minus 1 byte: each chunk will be just one page
+  {
+    auto const [result, num_chunks] = chunked_read(filepath, 285'007);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithStructsOfLists)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const generate_input = [num_rows](bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+    input_columns.emplace_back([=] {
+      std::vector<std::unique_ptr<cudf::column>> child_columns;
+      child_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+      child_columns.emplace_back(
+        int32s_col(int_iter + num_rows, int_iter + num_rows * 2).release());
+
+      auto const str_iter = cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) {
+        return std::to_string(i) + "++++++++++++++++++++" + std::to_string(i);
+      });
+      child_columns.emplace_back(strings_col{str_iter, str_iter + num_rows}.release());
+
+      auto const template_lists = int32s_lists_col{
+        int32s_lists_col{}, int32s_lists_col{0}, int32s_lists_col{0, 1}, int32s_lists_col{0, 1, 2}};
+      auto const gather_iter =
+        cudf::detail::make_counting_transform_iterator(0, [&](int32_t i) { return i % 4; });
+      auto const gather_map = int32s_col(gather_iter, gather_iter + num_rows);
+      child_columns.emplace_back(
+        std::move(cudf::gather(cudf::table_view{{template_lists}}, gather_map)->release().front()));
+
+      return structs_col(std::move(child_columns)).release();
+    }());
+
+    return write_file(input_columns,
+                      "chunked_read_with_structs_of_lists",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  };
+
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
+    EXPECT_EQ(num_chunks, 10);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Other tests:
+
+  // for these tests, different columns get written to different numbers of pages so it's a
+  // little tricky to describe the expected results by page counts. To get an idea of how
+  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in
+  // reader_impl_preprocess.cu -> find_splits()
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
+    EXPECT_EQ(num_chunks, 7);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+}
+
+TEST_F(ParquetChunkedReaderTest, TestChunkedReadWithListsOfStructs)
+{
+  auto constexpr num_rows = 100'000;
+
+  auto const generate_input = [num_rows](bool nullable) {
+    std::vector<std::unique_ptr<cudf::column>> input_columns;
+    auto const int_iter = thrust::make_counting_iterator(0);
+    input_columns.emplace_back(int32s_col(int_iter, int_iter + num_rows).release());
+
+    auto offsets = std::vector<cudf::size_type>{};
+    offsets.reserve(num_rows * 2);
+    cudf::size_type num_structs = 0;
+    for (int i = 0; i < num_rows; ++i) {
+      offsets.push_back(num_structs);
+      auto const new_list_size = i % 4;
+      num_structs += new_list_size;
+    }
+    offsets.push_back(num_structs);
+
+    auto const make_structs_col = [=] {
+      auto child1 = int32s_col(int_iter, int_iter + num_structs);
+      auto child2 = int32s_col(int_iter + num_structs, int_iter + num_structs * 2);
+
+      auto const str_iter = cudf::detail::make_counting_transform_iterator(
+        0, [&](int32_t i) { return std::to_string(i) + std::to_string(i) + std::to_string(i); });
+      auto child3 = strings_col{str_iter, str_iter + num_structs};
+
+      return structs_col{{child1, child2, child3}}.release();
+    };
+
+    input_columns.emplace_back(
+      cudf::make_lists_column(static_cast<cudf::size_type>(offsets.size() - 1),
+                              int32s_col(offsets.begin(), offsets.end()).release(),
+                              make_structs_col(),
+                              0,
+                              rmm::device_buffer{}));
+
+    return write_file(input_columns,
+                      "chunked_read_with_lists_of_structs",
+                      nullable,
+                      512 * 1024,  // 512KB per page
+                      20000        // 20k rows per page
+    );
+  };
+
+  auto const [expected_no_null, filepath_no_null]       = generate_input(false);
+  auto const [expected_with_nulls, filepath_with_nulls] = generate_input(true);
+
+  // Test with zero limit: everything will be read in one chunk
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 0);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very small limit: 1 byte
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1);
+    EXPECT_EQ(num_chunks, 10);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // Test with a very large limit
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2L << 40);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  // for these tests, different columns get written to different numbers of pages so it's a
+  // little tricky to describe the expected results by page counts. To get an idea of how
+  // these values are chosen, see the debug output from the call to print_cumulative_row_info() in
+  // reader_impl_preprocess.cu -> find_splits()
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'000'000);
+    EXPECT_EQ(num_chunks, 7);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 1'500'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 2'000'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_no_null, 5'000'000);
+    EXPECT_EQ(num_chunks, 2);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_no_null, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'000'000);
+    EXPECT_EQ(num_chunks, 5);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 1'500'000);
+    EXPECT_EQ(num_chunks, 4);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 2'000'000);
+    EXPECT_EQ(num_chunks, 3);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+
+  {
+    auto const [result, num_chunks] = chunked_read(filepath_with_nulls, 5'000'000);
+    EXPECT_EQ(num_chunks, 1);
+    CUDF_TEST_EXPECT_TABLES_EQUAL(*expected_with_nulls, *result);
+  }
+}

From 6d2a4f07d3cd22ae72393b03f86a01b32de82ed1 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Thu, 17 Nov 2022 18:40:22 -0800
Subject: [PATCH 189/202] Add wheel builds (#12096)

This PR enables building wheels. It mostly leverages various build options that have already been added to the repository.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Sevag H (https://github.com/sevagh)
  - Paul Taylor (https://github.com/trxcllnt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Sevag H (https://github.com/sevagh)

URL: https://github.com/rapidsai/cudf/pull/12096
---
 .github/workflows/wheels.yml                 |  77 ++++++++++
 .gitignore                                   |   3 +
 cpp/CMakeLists.txt                           | 150 +++++++++++--------
 python/cudf/CMakeLists.txt                   |  32 +++-
 python/cudf/LICENSE                          |   1 +
 python/cudf/_custom_build/backend.py         |  37 +++++
 python/cudf/cmake/Modules/WheelHelpers.cmake |  71 +++++++++
 python/cudf/cudf/_lib/CMakeLists.txt         |  19 ++-
 python/cudf/pyproject.toml                   |   6 +
 python/cudf/setup.py                         |  65 +++-----
 python/dask_cudf/LICENSE                     |   1 +
 python/dask_cudf/setup.py                    |  64 ++------
 12 files changed, 364 insertions(+), 162 deletions(-)
 create mode 100644 .github/workflows/wheels.yml
 create mode 120000 python/cudf/LICENSE
 create mode 100644 python/cudf/_custom_build/backend.py
 create mode 100644 python/cudf/cmake/Modules/WheelHelpers.cmake
 create mode 120000 python/dask_cudf/LICENSE

diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 00000000000..7f1c708c9a7
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,77 @@
+name: cuDF wheels
+
+on:
+  workflow_call:
+    inputs:
+      versioneer-override:
+        type: string
+        default: ''
+      build-tag:
+        type: string
+        default: ''
+      branch:
+        required: true
+        type: string
+      date:
+        required: true
+        type: string
+      sha:
+        required: true
+        type: string
+      build-type:
+        type: string
+        default: nightly
+
+concurrency:
+  group: "cudf-${{ github.workflow }}-${{ github.ref }}"
+  cancel-in-progress: true
+
+jobs:
+  cudf-wheels:
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-manylinux.yml@main
+    with:
+      repo: rapidsai/cudf
+
+      build-type: ${{ inputs.build-type }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+
+      package-dir: python/cudf
+      package-name: cudf
+
+      python-package-versioneer-override: ${{ inputs.versioneer-override }}
+      python-package-build-tag: ${{ inputs.build-tag }}
+
+      skbuild-configure-options: "-DCUDF_BUILD_WHEELS=ON -DDETECT_CONDA_ENV=OFF"
+
+      test-extras:  test
+
+      # Have to manually specify the cupy install location on arm.
+      # Have to also manually install tokenizers==0.10.2, which is the last tokenizers
+      # to have a binary aarch64 wheel available on PyPI
+      # Otherwise, the tokenizers sdist is used, which needs a Rust compiler
+      test-before-arm64: "pip install tokenizers==0.10.2 cupy-cuda11x -f https://pip.cupy.dev/aarch64"
+
+      test-unittest: "pytest -v -n 8 ./python/cudf/cudf/tests"
+    secrets: inherit
+  dask_cudf-wheel:
+    needs: cudf-wheels
+    uses: rapidsai/shared-action-workflows/.github/workflows/wheels-pure.yml@main
+    with:
+      repo: rapidsai/cudf
+
+      build-type: ${{ inputs.build-type }}
+      branch: ${{ inputs.branch }}
+      sha: ${{ inputs.sha }}
+      date: ${{ inputs.date }}
+
+      package-dir: python/dask_cudf
+      package-name: dask_cudf
+
+      python-package-versioneer-override: ${{ inputs.versioneer-override }}
+      python-package-build-tag: ${{ inputs.build-tag }}
+
+      test-extras:  test
+      test-unittest: "pytest -v -n 8 ./python/dask_cudf/dask_cudf/tests"
+    secrets: inherit
diff --git a/.gitignore b/.gitignore
index 91a7ecc49f7..1867e65b7be 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,3 +167,6 @@ docs/cudf/source/api_docs/generated/*
 docs/cudf/source/api_docs/api/*
 docs/cudf/source/user_guide/example_output/*
 docs/cudf/source/user_guide/cudf.*Dtype.*.rst
+
+# cibuildwheel
+/wheelhouse
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index c52248c1eab..6f4f42f6842 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -48,6 +48,8 @@ option(BUILD_TESTS "Configure CMake to build tests" ON)
 option(BUILD_BENCHMARKS "Configure CMake to build (google & nvbench) benchmarks" OFF)
 option(BUILD_SHARED_LIBS "Build cuDF shared libraries" ON)
 option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
+option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON)
+mark_as_advanced(CUDF_BUILD_TESTUTIL)
 option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
 option(CUDF_USE_ARROW_STATIC "Build and statically link Arrow libraries" OFF)
 option(CUDF_ENABLE_ARROW_ORC "Build the Arrow ORC adapter" OFF)
@@ -94,6 +96,12 @@ message(VERBOSE "CUDF: Statically link the CUDA runtime: ${CUDA_STATIC_RUNTIME}"
 rapids_cmake_build_type("Release")
 set(CUDF_BUILD_TESTS ${BUILD_TESTS})
 set(CUDF_BUILD_BENCHMARKS ${BUILD_BENCHMARKS})
+if(BUILD_TESTS AND NOT CUDF_BUILD_TESTUTIL)
+  message(
+    FATAL_ERROR
+      "Tests cannot be built without building cudf test utils. Please set CUDF_BUILD_TESTUTIL=ON or BUILD_TESTS=OFF"
+  )
+endif()
 
 set(CUDF_CXX_FLAGS "")
 set(CUDF_CUDA_FLAGS "")
@@ -133,12 +141,14 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
 # find zlib
 rapids_find_package(ZLIB REQUIRED)
 
-# find Threads (needed by cudftestutil)
-rapids_find_package(
-  Threads REQUIRED
-  BUILD_EXPORT_SET cudf-exports
-  INSTALL_EXPORT_SET cudf-exports
-)
+if(CUDF_BUILD_TESTUTIL)
+  # find Threads (needed by cudftestutil)
+  rapids_find_package(
+    Threads REQUIRED
+    BUILD_EXPORT_SET cudf-exports
+    INSTALL_EXPORT_SET cudf-exports
+  )
+endif()
 
 # add third party dependencies using CPM
 rapids_cpm_init()
@@ -160,7 +170,9 @@ rapids_cpm_libcudacxx(BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-expo
 # find cuCollections Should come after including thrust and libcudacxx
 include(cmake/thirdparty/get_cucollections.cmake)
 # find or install GoogleTest
-include(cmake/thirdparty/get_gtest.cmake)
+if(CUDF_BUILD_TESTUTIL)
+  include(cmake/thirdparty/get_gtest.cmake)
+endif()
 # preprocess jitify-able kernels
 include(cmake/Modules/JitifyPreprocessKernels.cmake)
 # find cuFile
@@ -694,46 +706,48 @@ add_library(cudf::cudf ALIAS cudf)
 # ##################################################################################################
 # * build cudftestutil ----------------------------------------------------------------------------
 
-add_library(
-  cudftestutil STATIC
-  tests/io/metadata_utilities.cpp
-  tests/utilities/base_fixture.cpp
-  tests/utilities/column_utilities.cu
-  tests/utilities/table_utilities.cu
-  tests/utilities/tdigest_utilities.cu
-)
+if(CUDF_BUILD_TESTUTIL)
+  add_library(
+    cudftestutil STATIC
+    tests/io/metadata_utilities.cpp
+    tests/utilities/base_fixture.cpp
+    tests/utilities/column_utilities.cu
+    tests/utilities/table_utilities.cu
+    tests/utilities/tdigest_utilities.cu
+  )
 
-set_target_properties(
-  cudftestutil
-  PROPERTIES BUILD_RPATH "\$ORIGIN"
-             INSTALL_RPATH "\$ORIGIN"
-             # set target compile options
-             CXX_STANDARD 17
-             CXX_STANDARD_REQUIRED ON
-             CUDA_STANDARD 17
-             CUDA_STANDARD_REQUIRED ON
-             POSITION_INDEPENDENT_CODE ON
-             INTERFACE_POSITION_INDEPENDENT_CODE ON
-)
+  set_target_properties(
+    cudftestutil
+    PROPERTIES BUILD_RPATH "\$ORIGIN"
+               INSTALL_RPATH "\$ORIGIN"
+               # set target compile options
+               CXX_STANDARD 17
+               CXX_STANDARD_REQUIRED ON
+               CUDA_STANDARD 17
+               CUDA_STANDARD_REQUIRED ON
+               POSITION_INDEPENDENT_CODE ON
+               INTERFACE_POSITION_INDEPENDENT_CODE ON
+  )
 
-target_compile_options(
-  cudftestutil PUBLIC "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
-                      "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>>"
-)
+  target_compile_options(
+    cudftestutil PUBLIC "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
+                        "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>>"
+  )
 
-target_link_libraries(
-  cudftestutil
-  PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf
-  PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
-)
+  target_link_libraries(
+    cudftestutil
+    PUBLIC GTest::gmock GTest::gtest Threads::Threads cudf
+    PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
+  )
 
-target_include_directories(
-  cudftestutil PUBLIC "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
-                      "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
-)
+  target_include_directories(
+    cudftestutil PUBLIC "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
+                        "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
+  )
 
-add_library(cudf::cudftestutil ALIAS cudftestutil)
+  add_library(cudf::cudftestutil ALIAS cudftestutil)
 
+endif()
 # ##################################################################################################
 # * add tests -------------------------------------------------------------------------------------
 
@@ -788,24 +802,26 @@ install(DIRECTORY ${CUDF_SOURCE_DIR}/include/cudf ${CUDF_SOURCE_DIR}/include/cud
                   ${CUDF_SOURCE_DIR}/include/nvtext DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
 )
 
-install(
-  TARGETS cudftestutil
-  DESTINATION ${lib_dir}
-  EXPORT cudf-testing-exports
-)
+if(CUDF_BUILD_TESTUTIL)
+  install(
+    TARGETS cudftestutil
+    DESTINATION ${lib_dir}
+    EXPORT cudf-testing-exports
+  )
 
-install(
-  EXPORT cudf-testing-exports
-  FILE cudf-testing-targets.cmake
-  NAMESPACE cudf::
-  DESTINATION "${lib_dir}/cmake/cudf"
-)
+  install(
+    EXPORT cudf-testing-exports
+    FILE cudf-testing-targets.cmake
+    NAMESPACE cudf::
+    DESTINATION "${lib_dir}/cmake/cudf"
+  )
 
-include("${rapids-cmake-dir}/export/write_dependencies.cmake")
-rapids_export_write_dependencies(
-  INSTALL cudf-testing-exports
-  "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake"
-)
+  include("${rapids-cmake-dir}/export/write_dependencies.cmake")
+  rapids_export_write_dependencies(
+    INSTALL cudf-testing-exports
+    "${PROJECT_BINARY_DIR}/rapids-cmake/cudf/export/cudf-testing-dependencies.cmake"
+  )
+endif()
 
 set(doc_string
     [=[
@@ -895,6 +911,7 @@ if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-testing-targets.cmake")
 endif()
 ]=]
 )
+
 string(APPEND build_code_string "${common_code_string}")
 
 rapids_export(
@@ -906,15 +923,16 @@ rapids_export(
   FINAL_CODE_BLOCK build_code_string
 )
 
-export(
-  EXPORT cudf-testing-exports
-  FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake
-  NAMESPACE cudf::
-)
-rapids_export_write_dependencies(
-  BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake"
-)
-
+if(CUDF_BUILD_TESTUTIL)
+  export(
+    EXPORT cudf-testing-exports
+    FILE ${CUDF_BINARY_DIR}/cudf-testing-targets.cmake
+    NAMESPACE cudf::
+  )
+  rapids_export_write_dependencies(
+    BUILD cudf-testing-exports "${CUDF_BINARY_DIR}/cudf-testing-dependencies.cmake"
+  )
+endif()
 # ##################################################################################################
 # * make documentation ----------------------------------------------------------------------------
 
diff --git a/python/cudf/CMakeLists.txt b/python/cudf/CMakeLists.txt
index 1c8bef42e4c..87ebcce1bc6 100644
--- a/python/cudf/CMakeLists.txt
+++ b/python/cudf/CMakeLists.txt
@@ -31,9 +31,15 @@ project(
 option(FIND_CUDF_CPP "Search for existing CUDF C++ installations before defaulting to local files"
        OFF
 )
+option(CUDF_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF)
 option(USE_LIBARROW_FROM_PYARROW "Use the libarrow contained within pyarrow." OFF)
 mark_as_advanced(USE_LIBARROW_FROM_PYARROW)
 
+# Always build wheels against the pyarrow libarrow.
+if(CUDF_BUILD_WHEELS)
+  set(USE_LIBARROW_FROM_PYARROW ON)
+endif()
+
 # If the user requested it we attempt to find CUDF.
 if(FIND_CUDF_CPP)
   if(USE_LIBARROW_FROM_PYARROW)
@@ -75,8 +81,32 @@ if(NOT cudf_FOUND)
 
   set(BUILD_TESTS OFF)
   set(BUILD_BENCHMARKS OFF)
-  add_subdirectory(../../cpp cudf-cpp)
 
+  set(_exclude_from_all "")
+  if(CUDF_BUILD_WHEELS)
+    # We don't build C++ tests when building wheels, so we can also omit the test util and shrink
+    # the wheel by avoiding embedding GTest.
+    set(CUDF_BUILD_TESTUTIL OFF)
+
+    # Statically link cudart if building wheels
+    set(CUDA_STATIC_RUNTIME ON)
+
+    # Need to set this so all the nvcomp targets are global, not only nvcomp::nvcomp
+    # https://cmake.org/cmake/help/latest/variable/CMAKE_FIND_PACKAGE_TARGETS_GLOBAL.html#variable:CMAKE_FIND_PACKAGE_TARGETS_GLOBAL
+    set(CMAKE_FIND_PACKAGE_TARGETS_GLOBAL ON)
+
+    # Don't install the cuDF C++ targets into wheels
+    set(_exclude_from_all EXCLUDE_FROM_ALL)
+  endif()
+
+  add_subdirectory(../../cpp cudf-cpp ${_exclude_from_all})
+
+  if(CUDF_BUILD_WHEELS)
+    include(cmake/Modules/WheelHelpers.cmake)
+    get_target_property(_nvcomp_link_libs nvcomp::nvcomp INTERFACE_LINK_LIBRARIES)
+    # Ensure all the shared objects we need at runtime are in the wheel
+    add_target_libs_to_wheel(LIB_DIR cudf TARGETS arrow_shared nvcomp::nvcomp ${_nvcomp_link_libs})
+  endif()
   # Since there are multiple subpackages of cudf._lib that require access to libcudf, we place the
   # library in the cudf directory as a single source of truth and modify the other rpaths
   # appropriately.
diff --git a/python/cudf/LICENSE b/python/cudf/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/cudf/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/cudf/_custom_build/backend.py b/python/cudf/_custom_build/backend.py
new file mode 100644
index 00000000000..37b7edf2432
--- /dev/null
+++ b/python/cudf/_custom_build/backend.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+"""Custom build backend for cudf to get versioned requirements.
+
+Based on https://setuptools.pypa.io/en/latest/build_meta.html
+"""
+import os
+from functools import wraps
+
+from setuptools import build_meta as _orig
+
+# Alias the required bits
+build_wheel = _orig.build_wheel
+build_sdist = _orig.build_sdist
+
+
+def replace_requirements(func):
+    @wraps(func)
+    def wrapper(config_settings=None):
+        orig_list = getattr(_orig, func.__name__)(config_settings)
+        append_list = [
+            f"rmm{os.getenv('RAPIDS_PY_WHEEL_CUDA_SUFFIX', default='')}"
+        ]
+        return orig_list + append_list
+
+    return wrapper
+
+
+get_requires_for_build_wheel = replace_requirements(
+    _orig.get_requires_for_build_wheel
+)
+get_requires_for_build_sdist = replace_requirements(
+    _orig.get_requires_for_build_sdist
+)
+get_requires_for_build_editable = replace_requirements(
+    _orig.get_requires_for_build_editable
+)
diff --git a/python/cudf/cmake/Modules/WheelHelpers.cmake b/python/cudf/cmake/Modules/WheelHelpers.cmake
new file mode 100644
index 00000000000..28ea33240fa
--- /dev/null
+++ b/python/cudf/cmake/Modules/WheelHelpers.cmake
@@ -0,0 +1,71 @@
+# =============================================================================
+# Copyright (c) 2022, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+include_guard(GLOBAL)
+
+# Making libraries available inside wheels by installing the associated targets.
+function(add_target_libs_to_wheel)
+  list(APPEND CMAKE_MESSAGE_CONTEXT "add_target_libs_to_wheel")
+
+  set(options "")
+  set(one_value "LIB_DIR")
+  set(multi_value "TARGETS")
+  cmake_parse_arguments(_ "${options}" "${one_value}" "${multi_value}" ${ARGN})
+
+  message(VERBOSE "Installing targets '${__TARGETS}' into lib_dir '${__LIB_DIR}'")
+
+  foreach(target IN LISTS __TARGETS)
+
+    if(NOT TARGET ${target})
+      message(VERBOSE "No target named ${target}")
+      continue()
+    endif()
+
+    get_target_property(alias_target ${target} ALIASED_TARGET)
+    if(alias_target)
+      set(target ${alias_target})
+    endif()
+
+    get_target_property(is_imported ${target} IMPORTED)
+    if(NOT is_imported)
+      # If the target isn't imported, install it into the the wheel
+      install(TARGETS ${target} DESTINATION ${__LIB_DIR})
+      message(VERBOSE "install(TARGETS ${target} DESTINATION ${__LIB_DIR})")
+    else()
+      # If the target is imported, make sure it's global
+      get_target_property(already_global ${target} IMPORTED_GLOBAL)
+      if(NOT already_global)
+        set_target_properties(${target} PROPERTIES IMPORTED_GLOBAL TRUE)
+      endif()
+
+      # Find the imported target's library so we can copy it into the wheel
+      set(lib_loc)
+      foreach(prop IN ITEMS IMPORTED_LOCATION IMPORTED_LOCATION_RELEASE IMPORTED_LOCATION_DEBUG)
+        get_target_property(lib_loc ${target} ${prop})
+        if(lib_loc)
+          message(VERBOSE "Found ${prop} for ${target}: ${lib_loc}")
+          break()
+        endif()
+        message(VERBOSE "${target} has no value for property ${prop}")
+      endforeach()
+
+      if(NOT lib_loc)
+        message(FATAL_ERROR "Found no libs to install for target ${target}")
+      endif()
+
+      # Copy the imported library into the wheel
+      install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})
+      message(VERBOSE "install(FILES ${lib_loc} DESTINATION ${__LIB_DIR})")
+    endif()
+  endforeach()
+endfunction()
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index d58bdee02ad..0b72298b51e 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -54,6 +54,7 @@ set(cython_sources
     utils.pyx
 )
 set(linked_libraries cudf::cudf)
+
 rapids_cython_create_modules(
   CXX
   SOURCE_FILES "${cython_sources}"
@@ -63,10 +64,24 @@ rapids_cython_create_modules(
 # TODO: Finding NumPy currently requires finding Development due to a bug in CMake. This bug was
 # fixed in https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7410 and will be available in
 # CMake 3.24, so we can remove the Development component once we upgrade to CMake 3.24.
-find_package(Python REQUIRED COMPONENTS Development NumPy)
+# find_package(Python REQUIRED COMPONENTS Development NumPy)
+
+# Note: The bug noted above prevents us from finding NumPy successfully using FindPython.cmake
+# inside the manylinux images used to build wheels because manylinux images do not contain
+# libpython.so and therefore Development cannot be found. Until we upgrade to CMake 3.24, we should
+# use FindNumpy.cmake instead (provided by scikit-build). When we switch to 3.24 we can try
+# switching back, but it may not work if that implicitly still requires Python libraries. In that
+# case we'll need to follow up with the CMake team to remove that dependency.  The stopgap solution
+# is to unpack the static lib tarballs in the wheel building jobs so that there are at least static
+# libs to be found, but that should be a last resort since it implies a dependency that isn't really
+# necessary. The relevant command is tar -xf /opt/_internal/static-libs-for-embedding-only.tar.xz -C
+# /opt/_internal"
+find_package(NumPy REQUIRED)
 set(targets_using_numpy interop avro csv orc json parquet)
 foreach(target IN LISTS targets_using_numpy)
-  target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
+  target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
+  # Switch to the line below when we switch back to FindPython.cmake in CMake 3.24.
+  # target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
 endforeach()
 
 set(targets_using_dlpack interop)
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 52490444dba..92b86649564 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -9,4 +9,10 @@ requires = [
     "scikit-build>=0.13.1",
     "cmake>=3.23.1",
     "ninja",
+    "numpy",
+    "pyarrow==9.0.0",
+    "protoc-wheel",
+    "versioneer",
 ]
+build-backend = "backend"
+backend-path = ["_custom_build"]
diff --git a/python/cudf/setup.py b/python/cudf/setup.py
index 9f22f87e240..2d5defc2849 100644
--- a/python/cudf/setup.py
+++ b/python/cudf/setup.py
@@ -1,16 +1,16 @@
 # Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import os
-import re
-import shutil
 
 import versioneer
 from setuptools import find_packages
 from skbuild import setup
 
+cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
+
 install_requires = [
     "cachetools",
-    "cuda-python>=11.5,<11.7.1",
+    "cuda-python>=11.7.1,<12.0",
     "fsspec>=0.6.0",
     "numba>=0.56.2",
     "numpy",
@@ -19,6 +19,11 @@
     "pandas>=1.0,<1.6.0dev0",
     "protobuf>=3.20.1,<3.21.0a0",
     "typing_extensions",
+    "pyarrow==9.0.0",
+    f"rmm{cuda_suffix}",
+    f"ptxcompiler{cuda_suffix}",
+    f"cubinlinker{cuda_suffix}",
+    "cupy-cuda11x",
 ]
 
 extras_require = {
@@ -33,55 +38,24 @@
         "pyorc",
         "msgpack",
         "transformers<=4.10.3",
+        "tzdata",
     ]
 }
 
+if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ:
+    orig_get_versions = versioneer.get_versions
 
-def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
-
-    cuda_version = None
-
-    with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f:
-        for line in f.readlines():
-            if re.search(r"#define CUDA_VERSION ", line) is not None:
-                cuda_version = line
-                break
-
-    if cuda_version is None:
-        raise TypeError("CUDA_VERSION not found in cuda.h")
-    cuda_version = int(cuda_version.split()[2])
-    return "%d%s%d" % (
-        cuda_version // 1000,
-        delimeter,
-        (cuda_version % 1000) // 10,
-    )
-
+    version_override = os.environ["RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE"]
 
-CUDA_HOME = os.environ.get("CUDA_HOME", False)
-if not CUDA_HOME:
-    path_to_cuda_gdb = shutil.which("cuda-gdb")
-    if path_to_cuda_gdb is None:
-        raise OSError(
-            "Could not locate CUDA. "
-            "Please set the environment variable "
-            "CUDA_HOME to the path to the CUDA installation "
-            "and try again."
-        )
-    CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb))
-
-if not os.path.isdir(CUDA_HOME):
-    raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}")
-
-cuda_include_dir = os.path.join(CUDA_HOME, "include")
-install_requires.append(
-    "cupy-cuda"
-    + get_cuda_version_from_header(cuda_include_dir)
-    + ">=9.5.0,<12.0.0a0"
-)
+    def get_versions():
+        data = orig_get_versions()
+        data["version"] = version_override
+        return data
 
+    versioneer.get_versions = get_versions
 
 setup(
-    name="cudf",
+    name=f"cudf{cuda_suffix}",
     version=versioneer.get_version(),
     description="cuDF - GPU Dataframe",
     url="https://github.com/rapidsai/cudf",
@@ -96,11 +70,12 @@ def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
     ],
+    cmdclass=versioneer.get_cmdclass(),
+    include_package_data=True,
     packages=find_packages(include=["cudf", "cudf.*"]),
     package_data={
         key: ["*.pxd"] for key in find_packages(include=["cudf._lib*"])
     },
-    cmdclass=versioneer.get_cmdclass(),
     install_requires=install_requires,
     extras_require=extras_require,
     zip_safe=False,
diff --git a/python/dask_cudf/LICENSE b/python/dask_cudf/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/dask_cudf/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 4fa2af89b9d..d9d4da9c4ab 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -1,19 +1,20 @@
 # Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 import os
-import re
-import shutil
 
 import versioneer
 from setuptools import find_packages, setup
 
+cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
+
 install_requires = [
-    "cudf",
-    "dask>=2022.9.2",
-    "distributed>=2022.9.2",
+    "dask==2022.9.2",
+    "distributed==2022.9.2",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",
+    f"cudf{cuda_suffix}",
+    "cupy-cuda11x",
 ]
 
 extras_require = {
@@ -21,58 +22,25 @@
         "numpy",
         "pandas>=1.0,<1.6.0dev0",
         "pytest",
+        "pytest-xdist",
         "numba>=0.56.2",
-        "dask>=2021.09.1",
-        "distributed>=2021.09.1",
     ]
 }
 
+if "RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE" in os.environ:
+    orig_get_versions = versioneer.get_versions
 
-def get_cuda_version_from_header(cuda_include_dir, delimeter=""):
-
-    cuda_version = None
-
-    with open(os.path.join(cuda_include_dir, "cuda.h"), encoding="utf-8") as f:
-        for line in f.readlines():
-            if re.search(r"#define CUDA_VERSION ", line) is not None:
-                cuda_version = line
-                break
-
-    if cuda_version is None:
-        raise TypeError("CUDA_VERSION not found in cuda.h")
-    cuda_version = int(cuda_version.split()[2])
-    return "%d%s%d" % (
-        cuda_version // 1000,
-        delimeter,
-        (cuda_version % 1000) // 10,
-    )
+    version_override = os.environ["RAPIDS_PY_WHEEL_VERSIONEER_OVERRIDE"]
 
+    def get_versions():
+        data = orig_get_versions()
+        data["version"] = version_override
+        return data
 
-CUDA_HOME = os.environ.get("CUDA_HOME", False)
-if not CUDA_HOME:
-    path_to_cuda_gdb = shutil.which("cuda-gdb")
-    if path_to_cuda_gdb is None:
-        raise OSError(
-            "Could not locate CUDA. "
-            "Please set the environment variable "
-            "CUDA_HOME to the path to the CUDA installation "
-            "and try again."
-        )
-    CUDA_HOME = os.path.dirname(os.path.dirname(path_to_cuda_gdb))
-
-if not os.path.isdir(CUDA_HOME):
-    raise OSError(f"Invalid CUDA_HOME: directory does not exist: {CUDA_HOME}")
-
-cuda_include_dir = os.path.join(CUDA_HOME, "include")
-install_requires.append(
-    "cupy-cuda"
-    + get_cuda_version_from_header(cuda_include_dir)
-    + ">=9.5.0,<12.0.0a0"
-)
-
+    versioneer.get_versions = get_versions
 
 setup(
-    name="dask-cudf",
+    name=f"dask-cudf{cuda_suffix}",
     version=versioneer.get_version(),
     description="Utilities for Dask and cuDF interactions",
     url="https://github.com/rapidsai/cudf",

From cc4b4dd27c4638a800cac9ef43f2c9a736b8e7a3 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 18 Nov 2022 00:14:59 -0500
Subject: [PATCH 190/202] Don't use CMake 3.25.0 as it has a show stopping
 FindCUDAToolkit bug (#12188)

Don't use CMake 3.25.0 as it has a show stopping FindCUDAToolkit bug

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cudf/pull/12188
---
 conda/environments/all_cuda-115_arch-x86_64.yaml  | 2 +-
 conda/recipes/cudf/conda_build_config.yaml        | 2 +-
 conda/recipes/cudf_kafka/meta.yaml                | 2 +-
 conda/recipes/libcudf/conda_build_config.yaml     | 2 +-
 conda/recipes/strings_udf/conda_build_config.yaml | 2 +-
 dependencies.yaml                                 | 2 +-
 python/cudf/pyproject.toml                        | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/conda/environments/all_cuda-115_arch-x86_64.yaml b/conda/environments/all_cuda-115_arch-x86_64.yaml
index a7e5f1a04a6..ee235049dd7 100644
--- a/conda/environments/all_cuda-115_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-115_arch-x86_64.yaml
@@ -13,7 +13,7 @@ dependencies:
 - botocore>=1.24.21
 - c-compiler
 - cachetools
-- cmake>=3.23.1
+- cmake>=3.23.1,!=3.25.0
 - cubinlinker
 - cuda-python>=11.7.1,<12.0
 - cudatoolkit=11.5
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
index 0027a80f1ec..4feac647e8c 100644
--- a/conda/recipes/cudf/conda_build_config.yaml
+++ b/conda/recipes/cudf/conda_build_config.yaml
@@ -8,7 +8,7 @@ sysroot_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.23.1"
+  - ">=3.23.1,!=3.25.0"
 
 cuda_compiler:
   - nvcc
diff --git a/conda/recipes/cudf_kafka/meta.yaml b/conda/recipes/cudf_kafka/meta.yaml
index a65373efec3..4f7a4bbc268 100644
--- a/conda/recipes/cudf_kafka/meta.yaml
+++ b/conda/recipes/cudf_kafka/meta.yaml
@@ -22,7 +22,7 @@ build:
 
 requirements:
   build:
-    - cmake >=3.23.1
+    - cmake >=3.23.1,!=3.25.0
     - {{ compiler('c') }}
     - {{ compiler('cxx') }}
     - sysroot_{{ target_platform }} {{ sysroot_version }}
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index 7f5bf219f1f..5179cb55d84 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -11,7 +11,7 @@ sysroot_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.23.1"
+  - ">=3.23.1,!=3.25.0"
 
 gtest_version:
   - "=1.10.0"
diff --git a/conda/recipes/strings_udf/conda_build_config.yaml b/conda/recipes/strings_udf/conda_build_config.yaml
index 0027a80f1ec..4feac647e8c 100644
--- a/conda/recipes/strings_udf/conda_build_config.yaml
+++ b/conda/recipes/strings_udf/conda_build_config.yaml
@@ -8,7 +8,7 @@ sysroot_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.23.1"
+  - ">=3.23.1,!=3.25.0"
 
 cuda_compiler:
   - nvcc
diff --git a/dependencies.yaml b/dependencies.yaml
index b8470f02f86..43fef852c65 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -40,7 +40,7 @@ dependencies:
     common:
       - output_types: [conda, requirements]
         packages:
-          - cmake>=3.23.1
+          - cmake>=3.23.1,!=3.25.0
           - cuda-python>=11.7.1,<12.0
           - cython>=0.29,<0.30
           - dlpack>=0.5,<0.6.0a0
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 92b86649564..c3d96e2ea3f 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -7,7 +7,7 @@ requires = [
     "setuptools",
     "cython>=0.29,<0.30",
     "scikit-build>=0.13.1",
-    "cmake>=3.23.1",
+    "cmake>=3.23.1,!=3.25.0",
     "ninja",
     "numpy",
     "pyarrow==9.0.0",

From a2f69e4a1605f2fe20601e47831c828a519d1b73 Mon Sep 17 00:00:00 2001
From: "Mads R. B. Kristensen" <madsbk@gmail.com>
Date: Fri, 18 Nov 2022 18:05:38 +0100
Subject: [PATCH 191/202] Reduce number of tests marked `spilling`  (#12197)

To save CI running time, this PR reduce the tests marked `spilling` drastically.

An alternative to https://github.com/rapidsai/cudf/pull/12187

Authors:
   - Mads R. B. Kristensen (https://github.com/madsbk)

Approvers:
   - https://github.com/brandon-b-miller
   - GALI PREM SAGAR (https://github.com/galipremsagar)
---
 python/cudf/cudf/tests/test_binops.py    | 253 ++++++++++++-----------
 python/cudf/cudf/tests/test_dataframe.py | 114 ++++++----
 python/cudf/cudf/tests/test_reshape.py   |  27 ++-
 3 files changed, 231 insertions(+), 163 deletions(-)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 6b720c3ad5c..5dfb962a4bb 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -14,6 +14,7 @@
 import cudf
 from cudf import Series
 from cudf.core._compat import PANDAS_GE_150
+from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.index import as_index
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import (
@@ -27,7 +28,6 @@
 
 STRING_TYPES = {"str"}
 
-
 _binops = [
     operator.add,
     operator.sub,
@@ -47,8 +47,131 @@
     operator.ge,
 ]
 
+_bitwise_binops = [operator.and_, operator.or_, operator.xor]
+
+_int_types = [
+    "int8",
+    "int16",
+    "int32",
+    "int64",
+    "uint8",
+    "uint16",
+    "uint32",
+]
+
+_cmpops = [
+    operator.lt,
+    operator.gt,
+    operator.le,
+    operator.ge,
+    operator.eq,
+    operator.ne,
+]
+
+_reflected_ops = [
+    lambda x: 1 + x,
+    lambda x: 2 * x,
+    lambda x: 2 - x,
+    lambda x: 2 // x,
+    lambda x: 2 / x,
+    lambda x: 3 + x,
+    lambda x: 3 * x,
+    lambda x: 3 - x,
+    lambda x: 3 // x,
+    lambda x: 3 / x,
+    lambda x: 3 % x,
+    lambda x: -1 + x,
+    lambda x: -2 * x,
+    lambda x: -2 - x,
+    lambda x: -2 // x,
+    lambda x: -2 / x,
+    lambda x: -3 + x,
+    lambda x: -3 * x,
+    lambda x: -3 - x,
+    lambda x: -3 // x,
+    lambda x: -3 / x,
+    lambda x: -3 % x,
+    lambda x: 0 + x,
+    lambda x: 0 * x,
+    lambda x: 0 - x,
+    lambda x: 0 // x,
+    lambda x: 0 / x,
+]
+
+_operators_arithmetic = [
+    "add",
+    "radd",
+    "sub",
+    "rsub",
+    "mul",
+    "rmul",
+    "mod",
+    "rmod",
+    "pow",
+    "rpow",
+    "div",
+    "divide",
+    "floordiv",
+    "rfloordiv",
+    "truediv",
+    "rtruediv",
+]
+
+_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"]
+
+
+_cudf_scalar_reflected_ops = [
+    lambda x: cudf.Scalar(1) + x,
+    lambda x: cudf.Scalar(2) * x,
+    lambda x: cudf.Scalar(2) - x,
+    lambda x: cudf.Scalar(2) // x,
+    lambda x: cudf.Scalar(2) / x,
+    lambda x: cudf.Scalar(3) + x,
+    lambda x: cudf.Scalar(3) * x,
+    lambda x: cudf.Scalar(3) - x,
+    lambda x: cudf.Scalar(3) // x,
+    lambda x: cudf.Scalar(3) / x,
+    lambda x: cudf.Scalar(3) % x,
+    lambda x: cudf.Scalar(-1) + x,
+    lambda x: cudf.Scalar(-2) * x,
+    lambda x: cudf.Scalar(-2) - x,
+    lambda x: cudf.Scalar(-2) // x,
+    lambda x: cudf.Scalar(-2) / x,
+    lambda x: cudf.Scalar(-3) + x,
+    lambda x: cudf.Scalar(-3) * x,
+    lambda x: cudf.Scalar(-3) - x,
+    lambda x: cudf.Scalar(-3) // x,
+    lambda x: cudf.Scalar(-3) / x,
+    lambda x: cudf.Scalar(-3) % x,
+    lambda x: cudf.Scalar(0) + x,
+    lambda x: cudf.Scalar(0) * x,
+    lambda x: cudf.Scalar(0) - x,
+    lambda x: cudf.Scalar(0) // x,
+    lambda x: cudf.Scalar(0) / x,
+]
+
+pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
 
+# If spilling is enabled globally, we skip many test permutations
+# to reduce running time.
+if get_global_manager() is not None:
+    _binops = _binops[:1]
+    _binops_compare = _binops_compare[:1]
+    _int_types = _int_types[-1:]
+    _cmpops = _cmpops[:1]
+    _reflected_ops = _reflected_ops[:1]
+    _operators_arithmetic = _operators_arithmetic[:1]
+    _operators_comparison = _operators_comparison[:1]
+    _cudf_scalar_reflected_ops = _cudf_scalar_reflected_ops[:1]
+    DATETIME_TYPES = {"datetime64[ms]"}  # noqa: F811
+    NUMERIC_TYPES = {"float32"}  # noqa: F811
+    FLOAT_TYPES = {"float64"}  # noqa: F811
+    INTEGER_TYPES = {"int16"}  # noqa: F811
+    TIMEDELTA_TYPES = {"timedelta64[s]"}  # noqa: F811
+    # To save time, we skip tests marked "pytest.mark.xfail"
+    pytest_xfail = pytest.mark.skipif
+
 
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
 @pytest.mark.parametrize("binop", _binops)
@@ -114,20 +237,6 @@ def test_series_binop_scalar(nelem, binop, obj_class, use_cudf_scalar):
     np.testing.assert_almost_equal(result.to_numpy(), binop(arr, rhs))
 
 
-_bitwise_binops = [operator.and_, operator.or_, operator.xor]
-
-
-_int_types = [
-    "int8",
-    "int16",
-    "int32",
-    "int64",
-    "uint8",
-    "uint16",
-    "uint32",
-]
-
-
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
 @pytest.mark.parametrize("binop", _bitwise_binops)
 @pytest.mark.parametrize(
@@ -152,16 +261,6 @@ def test_series_bitwise_binop(binop, obj_class, lhs_dtype, rhs_dtype):
     np.testing.assert_almost_equal(result.to_numpy(), binop(arr1, arr2))
 
 
-_cmpops = [
-    operator.lt,
-    operator.gt,
-    operator.le,
-    operator.ge,
-    operator.eq,
-    operator.ne,
-]
-
-
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
 @pytest.mark.parametrize("cmpop", _cmpops)
 @pytest.mark.parametrize(
@@ -385,37 +484,6 @@ def test_series_cmpop_mixed_dtype(cmpop, lhs_dtype, rhs_dtype, obj_class):
     np.testing.assert_array_equal(result.to_numpy(), cmpop(lhs, rhs))
 
 
-_reflected_ops = [
-    lambda x: 1 + x,
-    lambda x: 2 * x,
-    lambda x: 2 - x,
-    lambda x: 2 // x,
-    lambda x: 2 / x,
-    lambda x: 3 + x,
-    lambda x: 3 * x,
-    lambda x: 3 - x,
-    lambda x: 3 // x,
-    lambda x: 3 / x,
-    lambda x: 3 % x,
-    lambda x: -1 + x,
-    lambda x: -2 * x,
-    lambda x: -2 - x,
-    lambda x: -2 // x,
-    lambda x: -2 / x,
-    lambda x: -3 + x,
-    lambda x: -3 * x,
-    lambda x: -3 - x,
-    lambda x: -3 // x,
-    lambda x: -3 / x,
-    lambda x: -3 % x,
-    lambda x: 0 + x,
-    lambda x: 0 * x,
-    lambda x: 0 - x,
-    lambda x: 0 // x,
-    lambda x: 0 / x,
-]
-
-
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
 @pytest.mark.parametrize(
     "func, dtype", list(product(_reflected_ops, utils.NUMERIC_TYPES))
@@ -458,37 +526,6 @@ def test_cudf_scalar_reflected_ops_scalar(func, dtype):
     assert np.isclose(expected, actual)
 
 
-_cudf_scalar_reflected_ops = [
-    lambda x: cudf.Scalar(1) + x,
-    lambda x: cudf.Scalar(2) * x,
-    lambda x: cudf.Scalar(2) - x,
-    lambda x: cudf.Scalar(2) // x,
-    lambda x: cudf.Scalar(2) / x,
-    lambda x: cudf.Scalar(3) + x,
-    lambda x: cudf.Scalar(3) * x,
-    lambda x: cudf.Scalar(3) - x,
-    lambda x: cudf.Scalar(3) // x,
-    lambda x: cudf.Scalar(3) / x,
-    lambda x: cudf.Scalar(3) % x,
-    lambda x: cudf.Scalar(-1) + x,
-    lambda x: cudf.Scalar(-2) * x,
-    lambda x: cudf.Scalar(-2) - x,
-    lambda x: cudf.Scalar(-2) // x,
-    lambda x: cudf.Scalar(-2) / x,
-    lambda x: cudf.Scalar(-3) + x,
-    lambda x: cudf.Scalar(-3) * x,
-    lambda x: cudf.Scalar(-3) - x,
-    lambda x: cudf.Scalar(-3) // x,
-    lambda x: cudf.Scalar(-3) / x,
-    lambda x: cudf.Scalar(-3) % x,
-    lambda x: cudf.Scalar(0) + x,
-    lambda x: cudf.Scalar(0) * x,
-    lambda x: cudf.Scalar(0) - x,
-    lambda x: cudf.Scalar(0) // x,
-    lambda x: cudf.Scalar(0) / x,
-]
-
-
 @pytest.mark.parametrize("obj_class", ["Series", "Index"])
 @pytest.mark.parametrize(
     "funcs, dtype",
@@ -652,28 +689,6 @@ def test_boolean_scalar_binop(op):
     utils.assert_eq(op(psr, False), op(gsr, cudf.Scalar(False)))
 
 
-_operators_arithmetic = [
-    "add",
-    "radd",
-    "sub",
-    "rsub",
-    "mul",
-    "rmul",
-    "mod",
-    "rmod",
-    "pow",
-    "rpow",
-    "div",
-    "divide",
-    "floordiv",
-    "rfloordiv",
-    "truediv",
-    "rtruediv",
-]
-
-_operators_comparison = ["eq", "ne", "lt", "le", "gt", "ge"]
-
-
 @pytest.mark.parametrize("func", _operators_arithmetic)
 @pytest.mark.parametrize("has_nulls", [True, False])
 @pytest.mark.parametrize("fill_value", [None, 27])
@@ -887,7 +902,7 @@ def test_binop_bool_uint(func, rhs):
     (
         pytest.param(
             np.bool_,
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason=(
                     "Pandas handling of division by zero-bool is too strange"
                 )
@@ -918,7 +933,7 @@ def test_floordiv_zero_float64(series_dtype, divisor_dtype, scalar_divisor):
     (
         pytest.param(
             np.bool_,
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason=(
                     "Pandas handling of division by zero-bool is too strange"
                 )
@@ -1624,7 +1639,7 @@ def test_scalar_null_binops(op, dtype_l, dtype_r):
         "microseconds",
         pytest.param(
             "nanoseconds",
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 condition=not PANDAS_GE_150,
                 reason="https://github.com/pandas-dev/pandas/issues/36589",
             ),
@@ -1676,19 +1691,19 @@ def test_datetime_dateoffset_binaryop(
         {"months": 2, "years": 5, "seconds": 923, "microseconds": 481},
         pytest.param(
             {"milliseconds": 4},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Pandas gets the wrong answer for milliseconds"
             ),
         ),
         pytest.param(
             {"milliseconds": 4, "years": 2},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Pandas construction fails with these keywords"
             ),
         ),
         pytest.param(
             {"nanoseconds": 12},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Pandas gets the wrong answer for nanoseconds"
             ),
         ),
@@ -1732,7 +1747,7 @@ def test_datetime_dateoffset_binaryop_multiple(date_col, kwargs, op):
         "microseconds",
         pytest.param(
             "nanoseconds",
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 condition=not PANDAS_GE_150,
                 reason="https://github.com/pandas-dev/pandas/issues/36589",
             ),
@@ -2754,7 +2769,7 @@ def test_binops_decimal_comp_mixed_integer(args, integer_dtype, reflected):
         ),
     ],
 )
-@pytest.mark.xfail(
+@pytest_xfail(
     reason="binop operations not supported for different "
     "bit-width decimal types"
 )
@@ -2928,7 +2943,7 @@ def decimal_series(input, dtype):
     ],
 )
 @pytest.mark.parametrize("reflected", [True, False])
-@pytest.mark.xfail(
+@pytest_xfail(
     reason="binop operations not supported for different bit-width "
     "decimal types"
 )
@@ -3117,7 +3132,7 @@ def test_empty_column(binop, data, scalar):
         cudf.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]]),
         pytest.param(
             cudf.DataFrame([[1, None, None, 4], [5, 6, 7, None]]),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Cannot access Frame.values if frame contains nulls"
             ),
         ),
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 105f86df22e..f6716ece95b 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -28,6 +28,7 @@
     PANDAS_GE_134,
     PANDAS_LT_140,
 )
+from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.core.column import column
 from cudf.testing import _utils as utils
 from cudf.testing._utils import (
@@ -40,8 +41,23 @@
     gen_rand,
 )
 
+pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
 
+# Use this to "unmark" the module level spilling mark
+pytest_unmark_spilling = pytest.mark.skipif(
+    get_global_manager() is not None, reason="unmarked spilling"
+)
+
+# If spilling is enabled globally, we skip many test permutations
+# to reduce running time.
+if get_global_manager() is not None:
+    ALL_TYPES = ["float32"]  # noqa: F811
+    DATETIME_TYPES = ["datetime64[ms]"]  # noqa: F811
+    NUMERIC_TYPES = ["float32"]  # noqa: F811
+    # To save time, we skip tests marked "xfail"
+    pytest_xfail = pytest.mark.skipif
+
 
 def test_init_via_list_of_tuples():
     data = [
@@ -263,19 +279,19 @@ def test_append_index(a, b):
         {1: ["a", np.nan, "c"], 2: ["q", None, "u"]},
         pytest.param(
             {},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/11080"
             ),
         ),
         pytest.param(
             {1: [], 2: [], 3: []},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/11080"
             ),
         ),
         pytest.param(
             [1, 2, 3],
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/11080"
             ),
         ),
@@ -2067,6 +2083,7 @@ def gdf(pdf):
     return cudf.DataFrame.from_pandas(pdf)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data",
     [
@@ -2083,7 +2100,7 @@ def gdf(pdf):
         },
         pytest.param(
             {"x": [], "y": [], "z": []},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 condition=version.parse("11")
                 <= version.parse(cupy.__version__)
                 < version.parse("11.1"),
@@ -2093,7 +2110,7 @@ def gdf(pdf):
         ),
         pytest.param(
             {"x": []},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 condition=version.parse("11")
                 <= version.parse(cupy.__version__)
                 < version.parse("11.1"),
@@ -2214,6 +2231,7 @@ def _hide_host_other_warning(other):
         yield
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "binop",
     [
@@ -2304,6 +2322,7 @@ def test_bitwise_binops_df(pdf, gdf, binop):
     assert_eq(d, g)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "binop",
     [
@@ -2609,7 +2628,7 @@ def test_dataframe_boolmask(mask_shape):
         [True, False, True],
         pytest.param(
             cudf.Series([True, False, True]),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Pandas can't index a multiindex with a Series"
             ),
         ),
@@ -2761,6 +2780,7 @@ def test_tail_for_string():
     assert_eq(gdf.tail(3), gdf.to_pandas().tail(3))
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize("level", [None, 0, "l0", 1, ["l0", 1]])
 @pytest.mark.parametrize("drop", [True, False])
 @pytest.mark.parametrize(
@@ -2804,6 +2824,7 @@ def test_reset_index(level, drop, column_names, inplace, col_level, col_fill):
     assert_eq(expect, got)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize("level", [None, 0, 1, [None]])
 @pytest.mark.parametrize("drop", [False, True])
 @pytest.mark.parametrize("inplace", [False, True])
@@ -2964,7 +2985,7 @@ def test_set_index(data, index, drop, append, inplace):
 )
 @pytest.mark.parametrize("index", ["a", pd.Index([1, 1, 2, 2, 3])])
 @pytest.mark.parametrize("verify_integrity", [True])
-@pytest.mark.xfail
+@pytest_xfail
 def test_set_index_verify_integrity(data, index, verify_integrity):
     gdf = cudf.DataFrame(data)
     gdf.set_index(index, verify_integrity=verify_integrity)
@@ -3022,6 +3043,7 @@ def reindex_data_numeric():
     )
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize("copy", [True, False])
 @pytest.mark.parametrize(
     "args,gd_kwargs",
@@ -3178,6 +3200,7 @@ def test_dataframe_empty_sort_index():
     assert_eq(expect, got, check_index_type=True)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "index",
     [
@@ -3192,7 +3215,7 @@ def test_dataframe_empty_sort_index():
         pytest.param(
             pd.RangeIndex(2, -1, -1),
             marks=[
-                pytest.mark.xfail(
+                pytest_xfail(
                     condition=PANDAS_LT_140,
                     reason="https://github.com/pandas-dev/pandas/issues/43591",
                 )
@@ -3235,6 +3258,7 @@ def test_dataframe_sort_index(
         assert_eq(expected, got, check_index_type=True)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize("axis", [0, 1, "index", "columns"])
 @pytest.mark.parametrize(
     "level",
@@ -3772,7 +3796,7 @@ def test_dataframe_round(decimals):
         pytest.param(
             [["a", True], ["b", False], ["c", False]],
             marks=[
-                pytest.mark.xfail(
+                pytest_xfail(
                     reason="NotImplementedError: all does not "
                     "support columns of object dtype."
                 )
@@ -3824,7 +3848,7 @@ def test_all(data):
         pytest.param(
             [["a", True], ["b", False], ["c", False]],
             marks=[
-                pytest.mark.xfail(
+                pytest_xfail(
                     reason="NotImplementedError: any does not "
                     "support columns of object dtype."
                 )
@@ -3872,6 +3896,7 @@ def test_empty_dataframe_any(axis):
     assert_eq(got, expected, check_index_type=False)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize("a", [[], ["123"]])
 @pytest.mark.parametrize("b", ["123", ["123"]])
 @pytest.mark.parametrize(
@@ -4319,11 +4344,11 @@ def test_series_values_host_property(data):
         [5.0, 7.0, 8.0],
         pytest.param(
             pd.Categorical(["a", "b", "c"]),
-            marks=pytest.mark.xfail(raises=NotImplementedError),
+            marks=pytest_xfail(raises=NotImplementedError),
         ),
         pytest.param(
             ["m", "a", "d", "v"],
-            marks=pytest.mark.xfail(raises=TypeError),
+            marks=pytest_xfail(raises=TypeError),
         ),
     ],
 )
@@ -4344,26 +4369,26 @@ def test_series_values_property(data):
         {"A": np.float32(np.arange(3)), "B": np.float64(np.arange(3))},
         pytest.param(
             {"A": [1, None, 3], "B": [1, 2, None]},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Nulls not supported by values accessor"
             ),
         ),
         pytest.param(
             {"A": [None, None, None], "B": [None, None, None]},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Nulls not supported by values accessor"
             ),
         ),
         {"A": [], "B": []},
         pytest.param(
             {"A": [1, 2, 3], "B": ["a", "b", "c"]},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="str or categorical not supported by values accessor"
             ),
         ),
         pytest.param(
             {"A": pd.Categorical(["a", "b", "c"]), "B": ["d", "e", "f"]},
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="str or categorical not supported by values accessor"
             ),
         ),
@@ -4699,9 +4724,9 @@ def test_empty_df_astype(dtype, args):
     "errors",
     [
         pytest.param(
-            "raise", marks=pytest.mark.xfail(reason="should raise error here")
+            "raise", marks=pytest_xfail(reason="should raise error here")
         ),
-        pytest.param("other", marks=pytest.mark.xfail(raises=ValueError)),
+        pytest.param("other", marks=pytest_xfail(raises=ValueError)),
         "ignore",
     ],
 )
@@ -4734,6 +4759,7 @@ def test_df_constructor_dtype(dtype):
     assert_eq(expect, got)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data",
     [
@@ -5188,7 +5214,7 @@ def test_cov():
     assert_eq(pdf.cov(), gdf.cov())
 
 
-@pytest.mark.xfail(reason="cupy-based cov does not support nulls")
+@pytest_xfail(reason="cupy-based cov does not support nulls")
 def test_cov_nans():
     pdf = pd.DataFrame()
     pdf["a"] = [None, None, None, 2.00758632, None]
@@ -5200,6 +5226,7 @@ def test_cov_nans():
     assert_eq(pdf.cov(), gdf.cov())
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "gsr",
     [
@@ -5210,7 +5237,7 @@ def test_cov_nans():
         cudf.Series([4, 2, 3], index=cudf.core.index.RangeIndex(0, 3)),
         pytest.param(
             cudf.Series([4, 2, 3, 4, 5], index=["a", "b", "d", "0", "12"]),
-            marks=pytest.mark.xfail,
+            marks=pytest_xfail,
         ),
     ],
 )
@@ -5252,6 +5279,7 @@ def test_df_sr_binop(gsr, colnames, op):
     assert_eq(expect, got, check_dtype=False)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "op",
     [
@@ -5263,12 +5291,12 @@ def test_df_sr_binop(gsr, colnames, op):
         operator.pow,
         # comparison ops will temporarily XFAIL
         # see PR  https://github.com/rapidsai/cudf/pull/7491
-        pytest.param(operator.eq, marks=pytest.mark.xfail()),
-        pytest.param(operator.lt, marks=pytest.mark.xfail()),
-        pytest.param(operator.le, marks=pytest.mark.xfail()),
-        pytest.param(operator.gt, marks=pytest.mark.xfail()),
-        pytest.param(operator.ge, marks=pytest.mark.xfail()),
-        pytest.param(operator.ne, marks=pytest.mark.xfail()),
+        pytest.param(operator.eq, marks=pytest_xfail()),
+        pytest.param(operator.lt, marks=pytest_xfail()),
+        pytest.param(operator.le, marks=pytest_xfail()),
+        pytest.param(operator.gt, marks=pytest_xfail()),
+        pytest.param(operator.ge, marks=pytest_xfail()),
+        pytest.param(operator.ne, marks=pytest_xfail()),
     ],
 )
 @pytest.mark.parametrize(
@@ -5330,7 +5358,7 @@ def test_memory_usage(deep, index, set_index):
         )
 
 
-@pytest.mark.xfail
+@pytest_xfail
 def test_memory_usage_string():
     rows = int(100)
     df = pd.DataFrame(
@@ -6249,6 +6277,7 @@ def test_dataframe_init_from_arrays_cols(data, cols, index):
         assert_eq(pdf, gdf, check_dtype=False)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "col_data",
     [
@@ -6292,6 +6321,7 @@ def test_dataframe_assign_scalar(col_data, assign_val):
     assert_eq(pdf, gdf)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "col_data",
     [
@@ -6587,6 +6617,7 @@ def test_dataframe_info_null_counts():
     assert str_cmp == actual_string
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data1",
     [
@@ -7064,6 +7095,7 @@ def test_series_keys(ps):
         assert_eq(ps.keys(), gds.keys())
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "df",
     [
@@ -7144,6 +7176,7 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index):
         assert_eq(expected, actual, check_index_type=not gdf.empty)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "df",
     [
@@ -7174,7 +7207,7 @@ def test_dataframe_append_dataframe(df, other, sort, ignore_index):
         pd.Series([10, 11, 23, 234, 13]),
         pytest.param(
             pd.Series([10, 11, 23, 234, 13], index=[11, 12, 13, 44, 33]),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="pandas bug: "
                 "https://github.com/pandas-dev/pandas/issues/35092"
             ),
@@ -7227,6 +7260,7 @@ def test_dataframe_append_series_mixed_index():
         df.append(sr, ignore_index=True)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "df",
     [
@@ -7396,6 +7430,7 @@ def test_dataframe_ffill(df):
     assert_eq(expected, actual)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "df",
     [
@@ -7746,6 +7781,7 @@ def test_dataframe_init_with_columns(data, columns):
     )
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data, ignore_dtype",
     [
@@ -7825,6 +7861,7 @@ def test_dataframe_init_from_series_list(data, ignore_dtype, columns):
         assert_eq(expected, actual, check_index_type=True)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data, ignore_dtype, index",
     [
@@ -7995,6 +8032,7 @@ def test_dataframe_iterrows_itertuples():
         df.iterrows()
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "df",
     [
@@ -8027,7 +8065,7 @@ def test_dataframe_iterrows_itertuples():
                     ),
                 }
             ),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/6219"
             ),
         ),
@@ -8048,7 +8086,7 @@ def test_dataframe_iterrows_itertuples():
                     ),
                 }
             ),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/6219"
             ),
         ),
@@ -8072,6 +8110,7 @@ def test_describe_misc_include(df, include):
     assert_eq(expected, actual)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "df",
     [
@@ -8104,7 +8143,7 @@ def test_describe_misc_include(df, include):
                     ),
                 }
             ),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/6219"
             ),
         ),
@@ -8125,7 +8164,7 @@ def test_describe_misc_include(df, include):
                     ),
                 }
             ),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="https://github.com/rapidsai/cudf/issues/6219"
             ),
         ),
@@ -8554,6 +8593,7 @@ def test_dataframe_constructor_column_index_only():
     ) == id(gdf["c"]._column)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data",
     [
@@ -8655,6 +8695,7 @@ def test_agg_for_dataframe_with_string_columns(aggs):
         gdf.agg(aggs)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "join",
     ["left"],
@@ -8882,7 +8923,7 @@ def test_rename_for_level_RangeIndex_dataframe():
     assert_eq(expect, got)
 
 
-@pytest.mark.xfail(reason="level=None not implemented yet")
+@pytest_xfail(reason="level=None not implemented yet")
 def test_rename_for_level_is_None_MC():
     gdf = cudf.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
     gdf.columns = pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)])
@@ -9315,7 +9356,7 @@ def test_groupby_cov_positive_semidefinite_matrix():
     )
 
 
-@pytest.mark.xfail
+@pytest_xfail
 def test_groupby_cov_for_pandas_bug_case():
     # Handles case: pandas bug using ddof with missing data.
     # Filed an issue in Pandas on GH, link below:
@@ -9469,6 +9510,7 @@ def test_dataframe_rename_duplicate_column():
         gdf.rename(columns={"a": "b"}, inplace=True)
 
 
+@pytest_unmark_spilling
 @pytest.mark.parametrize(
     "data",
     [
@@ -9711,14 +9753,14 @@ def test_multiindex_wildcard_selection_all(wildcard_df):
     assert_eq(expect, got)
 
 
-@pytest.mark.xfail(reason="Not yet properly supported.")
+@pytest_xfail(reason="Not yet properly supported.")
 def test_multiindex_wildcard_selection_partial(wildcard_df):
     expect = wildcard_df.to_pandas().loc[:, (slice("a", "b"), "b")]
     got = wildcard_df.loc[:, (slice("a", "b"), "b")]
     assert_eq(expect, got)
 
 
-@pytest.mark.xfail(reason="Not yet properly supported.")
+@pytest_xfail(reason="Not yet properly supported.")
 def test_multiindex_wildcard_selection_three_level_all():
     midx = cudf.MultiIndex.from_tuples(
         [(c1, c2, c3) for c1 in "abcd" for c2 in "abc" for c3 in "ab"]
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 280b619c305..6336565af52 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -9,6 +9,7 @@
 import cudf
 from cudf import melt as cudf_melt
 from cudf.core._compat import PANDAS_GE_120
+from cudf.core.buffer.spill_manager import get_global_manager
 from cudf.testing._utils import (
     ALL_TYPES,
     DATETIME_TYPES,
@@ -16,8 +17,18 @@
     assert_eq,
 )
 
+pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
 
+# If spilling is enabled globally, we skip many test permutations
+# to reduce running time.
+if get_global_manager() is not None:
+    ALL_TYPES = ["float32"]  # noqa: F811
+    DATETIME_TYPES = ["datetime64[ms]"]  # noqa: F811
+    NUMERIC_TYPES = ["float32"]  # noqa: F811
+    # To save time, we skip tests marked "pytest.mark.xfail"
+    pytest_xfail = pytest.mark.skipif
+
 
 @pytest.mark.parametrize("num_id_vars", [0, 1, 2])
 @pytest.mark.parametrize("num_value_vars", [0, 1, 2])
@@ -80,7 +91,7 @@ def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
     + [
         pytest.param(
             "str",
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 condition=not PANDAS_GE_120, reason="pandas bug"
             ),
         )
@@ -443,7 +454,7 @@ def test_pivot_values(values):
         0,
         pytest.param(
             1,
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Categorical column indexes not supported"
             ),
         ),
@@ -451,7 +462,7 @@ def test_pivot_values(values):
         "foo",
         pytest.param(
             "bar",
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Categorical column indexes not supported"
             ),
         ),
@@ -459,24 +470,24 @@ def test_pivot_values(values):
         [],
         pytest.param(
             [0, 1],
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Categorical column indexes not supported"
             ),
         ),
         ["foo"],
         pytest.param(
             ["foo", "bar"],
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Categorical column indexes not supported"
             ),
         ),
         pytest.param(
             [0, 1, 2],
-            marks=pytest.mark.xfail(reason="Pandas behaviour unclear"),
+            marks=pytest_xfail(reason="Pandas behaviour unclear"),
         ),
         pytest.param(
             ["foo", "bar", "baz"],
-            marks=pytest.mark.xfail(reason="Pandas behaviour unclear"),
+            marks=pytest_xfail(reason="Pandas behaviour unclear"),
         ),
     ],
 )
@@ -508,7 +519,7 @@ def test_unstack_multiindex(level):
         pd.Index(range(0, 5), name="row_index"),
         pytest.param(
             pd.CategoricalIndex(["d", "e", "f", "g", "h"]),
-            marks=pytest.mark.xfail(
+            marks=pytest_xfail(
                 reason="Categorical column indexes not supported"
             ),
         ),

From 782fba31cd5f040d08118a8a988b20a45b9c5b01 Mon Sep 17 00:00:00 2001
From: Nghia Truong <nghiatruong.vn@gmail.com>
Date: Fri, 18 Nov 2022 11:13:20 -0800
Subject: [PATCH 192/202] Implement JNI for chunked Parquet reader (#11961)

This adds JNI for chunked Parquet reader. It depends on the chunked Parquet reader implementation PR  (https://github.com/rapidsai/cudf/pull/11867).

Authors:
   - https://github.com/nvdbaranec
   - Nghia Truong (https://github.com/ttnghia)

Approvers:
   - MithunR (https://github.com/mythrocks)
   - Robert (Bobby) Evans (https://github.com/revans2)
---
 .../ai/rapids/cudf/ParquetChunkedReader.java  | 155 ++++++++++++++++++
 java/src/main/native/CMakeLists.txt           |   1 +
 java/src/main/native/src/ChunkedReaderJni.cpp | 124 ++++++++++++++
 .../test/java/ai/rapids/cudf/TableTest.java   |  18 ++
 java/src/test/resources/splittable.parquet    | Bin 0 -> 320341 bytes
 5 files changed, 298 insertions(+)
 create mode 100644 java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
 create mode 100644 java/src/main/native/src/ChunkedReaderJni.cpp
 create mode 100644 java/src/test/resources/splittable.parquet

diff --git a/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
new file mode 100644
index 00000000000..c34336ac73f
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/ParquetChunkedReader.java
@@ -0,0 +1,155 @@
+/*
+ *
+ *  Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+package ai.rapids.cudf;
+
+import java.io.File;
+
+/**
+ * Provide an interface for reading a Parquet file in an iterative manner.
+ */
+public class ParquetChunkedReader implements AutoCloseable {
+  static {
+    NativeDepsLoader.loadNativeDeps();
+  }
+
+  /**
+   * Construct the reader instance from a read limit and a file path.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
+   * @param filePath Full path of the input Parquet file to read.
+   */
+  public ParquetChunkedReader(long chunkSizeByteLimit, File filePath) {
+    this(chunkSizeByteLimit, ParquetOptions.DEFAULT, filePath);
+  }
+
+  /**
+   * Construct the reader instance from a read limit, a ParquetOptions object, and a file path.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
+   * @param opts The options for Parquet reading.
+   * @param filePath Full path of the input Parquet file to read.
+   */
+  public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, File filePath) {
+    handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
+        filePath.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId());
+
+    if(handle == 0) {
+      throw new IllegalStateException("Cannot create native chunked Parquet reader object.");
+    }
+  }
+
+  /**
+   * Construct the reader instance from a read limit and a file already read in a memory buffer.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
+   * @param opts The options for Parquet reading.
+   * @param buffer Raw Parquet file content.
+   * @param offset The starting offset into buffer.
+   * @param len The number of bytes to parse the given buffer.
+   */
+  public ParquetChunkedReader(long chunkSizeByteLimit, ParquetOptions opts, HostMemoryBuffer buffer,
+      long offset, long len) {
+    handle = create(chunkSizeByteLimit, opts.getIncludeColumnNames(), opts.getReadBinaryAsString(), null,
+        buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId());
+
+    if(handle == 0) {
+      throw new IllegalStateException("Cannot create native chunked Parquet reader object.");
+    }
+  }
+
+  /**
+   * Check if the given file has anything left to read.
+   *
+   * @return A boolean value indicating if there is more data to read from file.
+   */
+  public boolean hasNext() {
+    if(handle == 0) {
+      throw new IllegalStateException("Native chunked Parquet reader object may have been closed.");
+    }
+
+    if (firstCall) {
+      // This function needs to return true at least once, so an empty table
+      // (but having empty columns instead of no column) can be returned by readChunk()
+      // if the input file has no row.
+      firstCall = false;
+      return true;
+    }
+    return hasNext(handle);
+  }
+
+  /**
+   * Read a chunk of rows in the given Parquet file such that the returning data has total size
+   * does not exceed the given read limit. If the given file has no data, or all data has been read
+   * before by previous calls to this function, a null Table will be returned.
+   *
+   * @return A table of new rows reading from the given file.
+   */
+  public Table readChunk() {
+    if(handle == 0) {
+      throw new IllegalStateException("Native chunked Parquet reader object may have been closed.");
+    }
+
+    long[] columnPtrs = readChunk(handle);
+    return columnPtrs != null ? new Table(columnPtrs) : null;
+  }
+
+  @Override
+  public void close() {
+    if (handle != 0) {
+      close(handle);
+      handle = 0;
+    }
+  }
+
+
+  /**
+   * Auxiliary variable to help {@link #hasNext()} returning true at least once.
+   */
+  private boolean firstCall = true;
+
+  /**
+   * Handle for memory address of the native Parquet chunked reader class.
+   */
+  private long handle;
+
+
+  /**
+   * Create a native chunked Parquet reader object on heap and return its memory address.
+   *
+   * @param chunkSizeByteLimit Limit on total number of bytes to be returned per read,
+   *                           or 0 if there is no limit.
+   * @param filterColumnNames Name of the columns to read, or an empty array if we want to read all.
+   * @param binaryToString Whether to convert the corresponding column to String if it is binary.
+   * @param filePath Full path of the file to read, or given as null if reading from a buffer.
+   * @param bufferAddrs The address of a buffer to read from, or 0 if we are not using that buffer.
+   * @param length The length of the buffer to read from.
+   * @param timeUnit Return type of time unit for timestamps.
+   */
+  private static native long create(long chunkSizeByteLimit, String[] filterColumnNames,
+      boolean[] binaryToString, String filePath, long bufferAddrs, long length, int timeUnit);
+
+  private static native boolean hasNext(long handle);
+
+  private static native long[] readChunk(long handle);
+
+  private static native void close(long handle);
+}
diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt
index 339f0f439a0..ac05b16b39a 100755
--- a/java/src/main/native/CMakeLists.txt
+++ b/java/src/main/native/CMakeLists.txt
@@ -130,6 +130,7 @@ add_library(
   cudfjni
   src/Aggregation128UtilsJni.cpp
   src/AggregationJni.cpp
+  src/ChunkedReaderJni.cpp
   src/CudfJni.cpp
   src/CudaJni.cpp
   src/ColumnVectorJni.cpp
diff --git a/java/src/main/native/src/ChunkedReaderJni.cpp b/java/src/main/native/src/ChunkedReaderJni.cpp
new file mode 100644
index 00000000000..553ec46d569
--- /dev/null
+++ b/java/src/main/native/src/ChunkedReaderJni.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+#include <vector>
+
+#include <cudf/column/column.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/table/table.hpp>
+
+#include "cudf_jni_apis.hpp"
+#include "jni_utils.hpp"
+
+// This function is defined in `TableJni.cpp`.
+jlongArray
+cudf::jni::convert_table_for_return(JNIEnv *env, std::unique_ptr<cudf::table> &&table_result,
+                                    std::vector<std::unique_ptr<cudf::column>> &&extra_columns);
+
+// This file is for the code releated to chunked reader (Parquet, ORC, etc.).
+
+extern "C" {
+
+// This function should take all the parameters that `Table.readParquet` takes,
+// plus one more parameter `long chunkSizeByteLimit`.
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_create(
+    JNIEnv *env, jclass, jlong chunk_read_limit, jobjectArray filter_col_names,
+    jbooleanArray j_col_binary_read, jstring inp_file_path, jlong buffer, jlong buffer_length,
+    jint unit) {
+  JNI_NULL_CHECK(env, j_col_binary_read, "Null col_binary_read", 0);
+  bool read_buffer = true;
+  if (buffer == 0) {
+    JNI_NULL_CHECK(env, inp_file_path, "Input file or buffer must be supplied", 0);
+    read_buffer = false;
+  } else if (inp_file_path != nullptr) {
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException",
+                  "Cannot pass in both a buffer and an inp_file_path", 0);
+  } else if (buffer_length <= 0) {
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "An empty buffer is not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+    cudf::jni::native_jstring filename(env, inp_file_path);
+    if (!read_buffer && filename.is_empty()) {
+      JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "inp_file_path cannot be empty", 0);
+    }
+
+    cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);
+
+    // TODO: This variable is unused now, but we still don't know what to do with it yet.
+    // As such, it needs to stay here for a little more time before we decide to use it again,
+    // or remove it completely.
+    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read);
+    (void)n_col_binary_read;
+
+    auto const source = read_buffer ?
+                            cudf::io::source_info(reinterpret_cast<char *>(buffer),
+                                                  static_cast<std::size_t>(buffer_length)) :
+                            cudf::io::source_info(filename.get());
+
+    auto opts_builder = cudf::io::parquet_reader_options::builder(source);
+    if (n_filter_col_names.size() > 0) {
+      opts_builder = opts_builder.columns(n_filter_col_names.as_cpp_vector());
+    }
+    auto const read_opts = opts_builder.convert_strings_to_categories(false)
+                               .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
+                               .build();
+
+    return reinterpret_cast<jlong>(new cudf::io::chunked_parquet_reader(
+        static_cast<std::size_t>(chunk_read_limit), read_opts));
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jboolean JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_hasNext(JNIEnv *env, jclass,
+                                                                            jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", false);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const reader_ptr = reinterpret_cast<cudf::io::chunked_parquet_reader *const>(handle);
+    return reader_ptr->has_next();
+  }
+  CATCH_STD(env, false);
+}
+
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_readChunk(JNIEnv *env, jclass,
+                                                                                jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const reader_ptr = reinterpret_cast<cudf::io::chunked_parquet_reader *const>(handle);
+    auto chunk = reader_ptr->read_chunk();
+    return chunk.tbl ? cudf::jni::convert_table_for_return(env, chunk.tbl) : nullptr;
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT void JNICALL Java_ai_rapids_cudf_ParquetChunkedReader_close(JNIEnv *env, jclass,
+                                                                      jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", );
+
+  try {
+    cudf::jni::auto_set_device(env);
+    delete reinterpret_cast<cudf::io::chunked_parquet_reader *>(handle);
+  }
+  CATCH_STD(env, );
+}
+
+} // extern "C"
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 2a33c37a8d6..bf951a871e7 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -75,6 +75,7 @@
 
 public class TableTest extends CudfTestBase {
   private static final File TEST_PARQUET_FILE = TestUtils.getResourceAsFile("acq.parquet");
+  private static final File TEST_PARQUET_FILE_CHUNKED_READ = TestUtils.getResourceAsFile("splittable.parquet");
   private static final File TEST_PARQUET_FILE_BINARY = TestUtils.getResourceAsFile("binary.parquet");
   private static final File TEST_ORC_FILE = TestUtils.getResourceAsFile("TestOrcFile.orc");
   private static final File TEST_ORC_TIMESTAMP_DATE_FILE = TestUtils.getResourceAsFile("timestamp-date-test.orc");
@@ -725,6 +726,23 @@ void testReadParquetContainsDecimalData() {
     }
   }
 
+  @Test
+  void testChunkedReadParquet() {
+    try (ParquetChunkedReader reader = new ParquetChunkedReader(240000,
+        TEST_PARQUET_FILE_CHUNKED_READ)) {
+      int numChunks = 0;
+      long totalRows = 0;
+      while(reader.hasNext()) {
+        ++numChunks;
+        try(Table chunk = reader.readChunk()) {
+          totalRows += chunk.getRowCount();
+        }
+      }
+      assertEquals(2, numChunks);
+      assertEquals(40000, totalRows);
+    }
+  }
+
   @Test
   void testReadAvro() {
     AvroOptions opts = AvroOptions.builder()
diff --git a/java/src/test/resources/splittable.parquet b/java/src/test/resources/splittable.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0f110ee10007546ce26b40c93da46cf527db10f7
GIT binary patch
literal 320341
zcmW*UV+`O7vj))bUE8*8+qP}nwr$(CZQHhOd+*uX^fYPud(tMId8S<c3h5*NFLJ*J
z!T(7WdEd4GZi7S)^1uH-{&NjLKmrk%e~*F^jNpVIB%ugR7{U^c@I)XYk%&wbq7seh
z#2_ZIh)o>g5|8*KAR&oJOcIikjO3&sC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#d
zl8^ippdf`POc9DwjN+7_B&8@#8Ol<Q@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQD
zOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6
zB%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo
z^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lo
zu5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q
z_{|^w^1uI_1Rx-R2uu)y5{%%4AS9s(O&G!wj_^bvB9Vwp6rvK1=)@o<v4~9^;u4Se
zBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOPxyVf(@{*7I6rdo5C`=KG
zQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayTG@v1kXiO8D(v0S`pe3zn
zO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)j
zB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD
z?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6
z?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=zxd4`{_^j|{~rMe
zNFV|egrEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=R
zIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@
zvXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?9q33W
zI@5)&bfY^x=t(bn(}%wFqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVpOk+AT
zn8_?=Gl#j%V?GO5$RZZAgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-vxmLx
zV?PHt$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}
zgr_{?IWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm_{lGR^M}9u`{@6V00blufeAuTf)Sh$
zgd`N92}4-I5uOM{BodK{LR6v=ofyO<7O{y#T;dU*1SBL8iAh3Il98Mgq$CxoNkdxF
zk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^q!N{>
zLRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%cogVb0
z7rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbdT;?&K
z1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WKhdIJg
zj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)yyO+H
zdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Y<J{S$zI1R^j&2ud)56M~S0A~azLOE|(4
zfrvyRGEs<1G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%fsAA#Gg-(=
zHnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7NbHL6pCn$)5;
zb*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$OedtR+`ZIum
z3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>EMhTBSjsY%
zvx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}ObApqc;xuPC
z%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0%RAolfscIR
zGhg`1H@@?OpZwxCfB4J)0;_)l5RgCwCI~?ZMsPw9l2C*u3}FdJcp?yyNJJ(IQHe%$
zVi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK74QWY7dNPoaOk^etS;<Cra*&f;<R%Y!
z$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tmN>ru_RjEdGYEY9})TRz~sYiVp(2zzn
zrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5deD<z^rjDe=|_JCFpxnEW(Y$W#&AY3
zl2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UAna6w<u#iP8W(iAK#&TA$l2xo`4QpA)
zdN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB<_JeQ#&J$?l2e@K3}-pVc`k5~OI+p(
zSGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfql2^Ru4R3kJdp_`yPkiPJU-`y&e(;lD
z{N@jT`CkzAPXGcEh`<CPD8UF$2tpEy(1al@;RsIzA`*#z{}rMTm1smK1~G|6Y~m1?
zc*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u2RX?_Zt{?qeB`G91t~;f
zicpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{1~sWgZR${$deo-@4QWJUn$VPHG^YhE
zX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%hA@<23}*x*8O3PEFqUzQ
zX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zsmavp%EN2BPS;cDBu$FbKX9FAA#Addz
zm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9OnclImKztaF%nN=K>eG#AU8<m1|t*1~<9I
zZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH=K~-4#Am+nm2Z6K2S546Z~pL?{{>b5
z1Rx-R2uu)y5{%%4AS9s(O&G!wj_^bvB9Vwp6rvK1=)@o<v4~9^;u4SeBp@M)NK6uv
zl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOPxyVf(@{*7I6rdo5C`=KGQjFr1pd_Uz
zO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayTG@v1kXiO8D(v0S`pe3znO&i+Mj`nn*
zBc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb
z>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh
z_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZ
zc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=zxd4`{_^h!|9=D^Ab|)>5P}kn
z;DjI~p$JVF!V-?~L?9xOh)fis5{>A@ASSVhO&sD9kN6}YA&E##5|WaP<fI@asYp#4
z(vpt!WFRA%$V?Wpl8x--ASb!VO&;=+kNgy%AcZJQ5sFfb;*_8yr6^4q%2JN<RG=c2
zs7w{AQjO}=peD7bO&#h|kNPyAA&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)
zpeMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3
zkNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{2
z5sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0
zuXxQH-tvz3eBdLW_{<l+@{RBO;3vQM%^&{ques+x0uYcu1SSYU2}W>25Ry=YCJbQ-
zM|dI-kw`=)3Q>thbYc*bSi~j{afwHK5|EHYBqj+-Nk(!~kdjoSCJkvxM|v`lkxXPJ
z3t7oVc5;xDT;wJXdC5n93Q&+j6s8D8DMoQhP?A!VrVM2%M|mnxkxEpi3RS5_b!t$P
zTGXZvb*V>v8qknNG^PnnX-0Ee(2`cPrVVXrM|(QZkxq1`3tj0(cY4s1Ui799ed$Mk
z1~8C83}y&J8OCr%Fp^P>W(;E)$9N_%kx5Ku3R9WJbY?J<S<Ge*bD76{7O;>-EM^Hy
zS;lf!u##1*W({ju$9gufkxgu73tQR7c6P9nUF>ELd)dc+4seh|9Oei|ImU5LaFSD;
z<_u>!$9XPrkxN|W3Rk(tb#8EzTioUjce%%X9`KMyJmv{cdB$^I@RC=&<_&Lo$9q2T
zkxzW)3t#!hcYg4bU;O3|fBDzg_a6ZWNFV|egrEc?I3Wm0C_)p4u!JK#5r{}6A`^wE
zL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18
zlZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2
zNFy54gr+p3IW1^OD_YZrwzQ)?9q33WI@5)&bfY^x=t(bn(}%wFqdx-}$RGwYgrN*$
zI3pOzC`L1ev5aFp6PU;(CNqVpOk+ATn8_?=Gl#j%V?GO5$RZZAgrzKFIV)JnDps?G
zwX9=38`#JuHnWATY-2k+*vT$-vxmLxV?PHt$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJ
zE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}gr_{?IWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm
z_{lGR^M}9uYwG=v00blufeAuTf)Sh$gd`N92}4-I5uOM{BodK{LR6v=ofyO<7O{y#
zT;dU*1SBL8iAh3Il98Mgq$CxoNkdxFk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bW
zg(*T&icy>rl%y1;DMMMxQJxA^q!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esV
zw4@cSX+vAu(Vh-;q!XR#LRY%cogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{
zF`fxbWD=8^!c?X)of*tz7PFbdT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&
z!dAAiogM6C7rWWRUiPt{103WKhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb
z7Pq;>UG8z82R!5vk9opVp7ER)yyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Z`p
z|3?4<5{SSAAt=EJP6$F0iqM21Ea3=G1R@fN$V4G3(TGkAViJqk#33&6h))6%l8D44
zAt}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<
ziqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe
z2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?
z)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z
z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~
z@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSEhI#%Y009X^V1f{o
zU<4-wAqhoj!Vs2lgeL+Ki9}?g5S3^|Ck8QzMQq{_mw3b{0SQS&Vv>-QWF#jADM>|Y
z(vX&Pq$dLz$wX$dkd<s?CkHvnMQ-wtmwe=>00k*TVTw?cViczYB`HN|%21Yal&1m}
zsYGR}P?c&_rv^2tMQ!R(mwMEv0S#$HW17&EW;CY-Eont-+R&DEw5J0d=|pF`(3Ng<
zrw2XhMQ{4hmwxnT00SAsV1_W1VGL&kBN@eL#xRy~jAsH9nZ#tKFqLUcX9hEw#cbv<
zmwC)*0Sj5gVwSL!Wh`d}D_O;A*07d!tY-ro*~DhHu$66WX9qjk#cuYnmwoK#00%k5
zVUBQ=V;tuMCppDw&Ty7<oaX`;xx{6zaFuIZ=LR>q#cl3zmwVjj0S|e^W1jGoXFTTx
zFL}jl-td-pyypWS`NU_w@Re_T=LbLe#c%%bmw$tN{}F(I1R^j&2ud)56M~S0A~azL
zOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%fsAA#
zGg-(=HnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7NbHL6pC
zn$)5;b*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$OedtR+
z`ZIum3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>EMhTB
zSjsY%vx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}ObApqc
z;xuPC%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0%RAol
zfscIRGhg`1H@@?OpZwxCfB4J4A>RK8KtKW!m>>it7{LiaNJ0^sFoY!>;fX**A`zJ=
zL?s&0i9t+a5t}%~B_8ofKtd9cm?R`68OcdON>Y)UG^8aR>B&GwGLe}qWF;Hf$w5wX
zk()f^B_H`IKtT#om?9LV7{w_;NlH<gGL)qp<*7hLDp8p#RHYi#sX<L@QJXr{r5^QZ
zKtmeQm?ku(8O>=yOIp#IHngQ3?dd>AI?<Ufbfp{J=|N9=(VIT>r62tnz(58um>~>h
z7{eLCNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GHeWghccz(N+Wm?bP_8OvG0N>;I&
zHLPVF>)F6YHnEv4Y-JnU*}+bBv70^YWgq)Fz(Edim?IqJ7{@umNltN^Go0ld=efW|
zE^(PFT;&?qxxr0tahp5b<sSEWz(XGKm?u2t8P9paOJ4DsH@xK?@A<$-KJl3^eB~S8
z`N2<q@tZ&V<=+7Re*_>Pfe1_xf)b42gdilL2u&Em5{~dhAR>{7OcbILjp)Q6Cb5W3
z9O4p>_#_}9iAYQml9G(%q#z}!NKG2jl8*FbAS0Q`Oct_|jqKzgC%MQ?9`cfp{1l)d
zg(yrBic*Z?l%OP~C`}p4QjYRepdyv1Ockn9jq22(Cbg(d9qLk#`ZS;+jc800n$nEs
zw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^Q
zGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@
z%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXt
zCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C%^d3AO7+$
zyyHIt5RgCwCI~?ZMsPw9l2C*u3}FdJcp?yyNJJ(IQHe%$Vi1#9#3l}LiAQ`AkdQ<q
zCJ9MNMsiY+l2oK74QWY7dNPoaOk^etS;<Cra*&f;<R%Y!$wz()P>@0trU*qTMsZ3|
zl2VkW3}q=tc`8tmN>ru_RjEdGYEY9})TRz~sYiVp(2zznrU^}HMsr%wl2){)4Q**h
zdpgjOPIRUVUFk-5deD<z^rjDe=|_JCFpxnEW(Y$W#&AY3l2MFi3}YF`cqTBBNla!6
zQ<=teW-yak%w`UAna6w<u#iP8W(iAK#&TA$l2xo`4QpA)dN#0;O>AZhTiM2TcCeFO
z>}C&p*~fkkaF9bB<_JeQ#&J$?l2e@K3}-pVc`k5~OI+p(SGmS@Zg7)Z+~y8<xyOAT
z@Q_D5<_S-E#&cfql2^Ru4R3kJdp_`yPkiPJU-`y&e(;lD{N@jT`4`;t9{~tRAOaJF
zpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2w
zQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTq
zs7NI$Q-!KjqdGOHNiAwqhq~0GJ`HF{BO23$rZl5DEoezATGNKMw4*&8=tw6z(}k{d
zqdPt5NiTZShraZqKLZ%ZAO<spp$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3w
zhq=sSJ_}gLA{MiRr7UAPD_F@YR<nk+tYbYJ*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-
zAr5ndqa5QnCpgI|PIHE{oZ~zfxX2|gbA_v1<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01
zFL=o-Uh{^xyyHC|_{b+d^M$W`<2yh2$uEBMhrj#_?fQ=Z1SAlF2|`eU5u6Z&Bov_u
zLs-HQo(M!F5|N2QRH6}`7{nwNv57-m;t`(&BqR}uNkUSRk(?ByBo(PiLt4_2o(yCp
z6Pd|ER<e<u9ONVyxyeIb@{ykc6r>P^DMC?-QJfN#q!gtoLs`mEo(fc?5|yb!RjN^)
z8q}l~wW&j0>QSEtG^7!YX+l$)(VP~vq!q1cLtEO>o(^=R6P@WoSGv)i9`vLaz3D?=
z`q7^O3}g_48NyJ8F`N;MWE7(r!&t^Ko(W835|f$2RHiYV8O&rBvzfzO<}sfIEMyUj
zS;A75v78mGWEHDf!&=s{o(*hd6Pww>R<^O79qeQmyV=8D_OYJ>9OMv(Il@tnahwyJ
z<P@hl!&%O8o(o*$5|_EcRjzTJ8{Fg;x4FYz?s1<7Jme9NdBRhk@thaD<Q1=Z!&~0*
zo)3KF6QB9QSHAI`AN=GOzxl&o{ss2^M*sp6h`<CPD8UF$2tpEy(1al@;RsIzA`*$n
zL?J5Ch)xV*5{uZxAujQVPXZE>h{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&
zl8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2P
zPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hD
zD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;
z)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=
zE^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK
z_|6Z0@{8a6;V=KfI{!l;=>Gx|h`<CPD8UF$2tpEy(1al@;RsIzA`*$nL?J5Ch)xV*
z5{uZxAujQVPXZE>h{PlzDalAq3R04a)TALT>5y&uzYJs~6Pd|ER<e<u9ONVyxyeIb
z@{ykc6r>P^DMC?-QJfN#q!gtoLs`mEo(fc?5|yb!RjN^)8q}l~wW&j0>QSEtG^7!Y
zX+l$)(VP~vq!q1cLtEO>o(^=R6P@WoSGv)i9`vLaz3D?=`q7^O3}g_48NyJ8F`N;M
zWE7(r!&t^Ko(W835|f$2RHiYV8O&rBvzfzO<}sfIEMyUjS;A75v78mGWEHDf!&=s{
zo(*hd6Pww>R<^O79qeQmyV=8D_OYJ>9OMv(Il@tnahwyJ<P@hl!&%O8o(o*$5|_Ec
zRjzTJ8{Fg;x4FYz?s1<7Jme9NdBRhk@thaD<Q1=Z!&~0*o)3KF6QB9QSHAI`AN=GO
zzxl&o{sr~^M*sp6h`<CPD8UF$2tpEy(1al@;RsIzA`*$nL?J5Ch)xV*5{uZxAujQV
zPXZE>h{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1
zD8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O
z*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38b
zCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g
z*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC
z;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=I}y8j~p
z0SQE4f)JEo1SbR`2}Nka5SDO+Cjt?PL}a26m1smK1~G|6Y~m1?c*G|G2}wj^l8}^S
zBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u2RX?_Zt{?qeB`G91t~;ficpkd6sH6wDMe|@
zP?mC(rveqJL}jW_m1<O{1~sWgZR${$deo-@4QWJUn$VPHG^YhEX+>+=(3W<zrvn}7
zL}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%hA@<23}*x*8O3PEFqUzQX95$M#AK#0m1#_8
z1~Zw(Z00bRdCX@43t7Zsmavp%EN2BPS;cDBu$FbKX9FAA#Addzm2GTi2RqrtZuYR3
zeeCA|2RX!Hj&PJ?9OnclImKztaF%nN=K>eG#AU8<m1|t*1~<9IZSHWFd)(&%4|&96
zp74}sJm&>3dBtnq@RoPH=K~-4#Am+nm2Z6K2S546Z~pL?e*yjf5rBXMA}~P+N-%;G
zf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpo
zI?|JYjASA+S;$H@vXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9
zRj5ies#AlS)S@<Zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6
z^rAO?=u1EPGk}2%VlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Dro
zvw(#xVlhit$}*O-f|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z
z$}x^}f|H!$G-o)=InHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDF
zHE(#!JKpnwk9^`YU--&5zVm~h{Ngu%_{+cL9RCr3fCM5iK?q7Pf)j#}gd#Ly2unD^
z6M=|CA~I2kN;IMqgP6o3HgSkcJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXY
zN;a~SgPi0dH+jfQKJrt5f)t`KMJP%!ic^A;l%h0cC`&oYQ-O+9qB2#eN;RregPPQ$
zHg%{=J?hhdhBTrvO=wCpn$v=ow4ya_XiGcV(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(
zfed0WLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>
zma~GDtYS55Sj#%rvw@9lVl!LV$~LyMgPrVRH+$I2KK65fgB;>8M>xtcj&p*OoZ>WR
zILkTCbAgLo;xbpb$~CTYgPYvqHg~woJ?`^>hdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i
z;xk|P$~V6AgP;83H-Grczr{TN5rBXMA}~P+N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3
zq7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpoI?|JYjASA+S;$H@vXg_H<RUkD
z$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9Rj5ies#AlS)S@<Zs7pQS(}0FF
zqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6^rAO?=u1EPGk}2%VlYD($}omA
zf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Drovw(#xVlhit$}*O-f|aadHEUSQ
zI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z$}x^}f|H!$G-o)=InHx|i(KL|
zSGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDFHE(#!JKpnwk9^`YU--&5zVm~h
z{Ngu%_{+bgT>lY(fCM5iK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkc
zJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKJrt5f)t`K
zMJP%!ic^A;l%h0cC`&oYQ-O+9qB2#eN;RregPPQ$Hg%{=J?hhdhBTrvO=wCpn$v=o
zw4ya_XiGcV(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(fed0WLm0|1hBJbZjAArn7|S@u
zGl7XrVlq>h$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>ma~GDtYS55Sj#%rvw@9lVl!LV
z$~LyMgPrVRH+$I2KK65fgB;>8M>xtcj&p*OoZ>WRILkTCbAgLo;xbpb$~CTYgPYvq
zHg~woJ?`^>hdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i;xk|P$~V6AgP;83H-GrczlD7N
z5rBXMA}~P+N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uw
zN-~m@f|R5pHEBpoI?|JYjASA+S;$H@vXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?
zG-W7DIm%Okid3R9Rj5ies#AlS)S@<Zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEM
zj&!0kUFb?Ty3>Q6^rAO?=u1EPGk}2%VlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$
zrZa<?%wjfkn9Drovw(#xVlhit$}*O-f|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2
z*vmflbAW>!;xI=z$}x^}f|H!$G-o)=InHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu
z;xSKn$}^txf|tDFHE(#!JKpnwk9^`YU--&5zVm~h{Ngu%_{+a#oc|GkfCM5iK?q7P
zf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmpgd`#{Nk~dEl9Pgzq#`wG
zNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKJrt5f)t`KMJP%!ic^A;l%h0cC`&oYQ-O+9
zqB2#eN;RregPPQ$Hg%{=J?hhdhBTrvO=wCpn$v=ow4ya_XiGcV(}9k3qBC9SN;kUG
zgP!!FH+|?!Kl(F(fed0WLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?HglNE
zJm#~2g)Cw*OIXS>ma~GDtYS55Sj#%rvw@9lVl!LV$~LyMgPrVRH+$I2KK65fgB;>8
zM>xtcj&p*OoZ>WRILkTCbAgLo;xbpb$~CTYgPYvqHg~woJ?`^>hdkmjPk72Rp7Vm2
zyy7))c*{H9^MQ|i;xk|P$~V6AgP;83H-GrczeT+N5rBXMA}~P+N-%;Gf{=tFG+_u!
zIKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpoI?|JYjASA+
zS;$H@vXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9Rj5ies#AlS
z)S@<Zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6^rAO?=u1EP
zGk}2%VlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Drovw(#xVlhit
z$}*O-f|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z$}x^}f|H!$
zG-o)=InHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDFHE(#!JKpnw
zk9^`YU--&5zVm~h{Ngu%_{+Z~-2V}PfCM5iK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2k
zN;IMqgP6o3HgSkcJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0d
zH+jfQKJrt5f)t`KMJP%!ic^A;l%h0cC`&oYQ-O+9qB2#eN;RregPPQ$Hg%{=J?hhd
zhBTrvO=wCpn$v=ow4ya_XiGcV(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(fed0WLm0|1
zhBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>ma~GDtYS55
zSj#%rvw@9lVl!LV$~LyMgPrVRH+$I2KK65fgB;>8M>xtcj&p*OoZ>WRILkTCbAgLo
z;xbpb$~CTYgPYvqHg~woJ?`^>hdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i;xk|P$~V6A
zgP;83H-Grc|Kh2C0ub>37^a78F%SR%V11q1wtj8fwr$(CZQHhO+qP|+lSPgYf)JEo
z1SbR`2}Nka5SDO+Cjt?PL}a26m1smK1~G|6Y~m1?c*G|G0VE_5iAh3Il98Mgq$Cxo
zNkdxFk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^
zq!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%c
zogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbd
zT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WK
zhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)
zyyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Ys&z<&bsA3+F8FoF|;kc1*MVF*h&
z!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhR
zWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)P
zQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD
z&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^n
zDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~
z)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#
zKJtmreBmqK_|6Z0@{8a6;V=IPl+b?y^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3
zq7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO
z<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL
z(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G
z&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))P
zE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l
z%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufC
ze)5ao{NXSE2$aZw0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi
z;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|
z6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S
z(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq
z&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<b
zE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia
z+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=W
ze**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp
z$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_
zl%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^
z(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@
z&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#D
zFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-
z$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX#D4<wA3+F8FoF|;
zkc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT
z=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBc
zRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM
z(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES
z&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=
zD91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy
z*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl+=F$^B+M7N-%;Gf{=tFG+_u!IKmTw
zh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e
z*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z
z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KK
zGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4
z&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9o
zEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN
z&wSx4-}ufCe)5ao{NXSE2$al!0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uO
zn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI
z`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cy
zG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYr
zGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e
z&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!
zD%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C
z-~8b({|J=ae**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp
z01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>
z#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQv
zw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SI
zGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%
z&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4c
zF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX!hZtu
zA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq
z3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br
z<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>G
zbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAl
zGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG
z&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtP
zDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl+u3!^B+M7N-%;Gf{=tF
zG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*4
z1~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$
z)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D
z^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+h
zvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A
z&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1
zE$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$afy0`nh12ud)56M~S0A~azLOE|(4frvyR
zGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S
z4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g
z^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j
z3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4Mo
zvWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk
z&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oD
zE8qCe4}S8C-~8b({|J=Ye**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3
zHgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%
z0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR
z&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=g
zjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGF
zvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-
z&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=
zFaHRX#(x6yA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jb
zh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF
z2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5
z?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1
zOkpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}i
zvWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M
z&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl-7R&^B+M7
zN-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?z
ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq
z1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G
z-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}
z%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nD
za)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}
z&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$ar$0`nh12ud)56M~S0A~azL
zOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQ
ziOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?
z4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_
z{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%N
zEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9
za*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY
z&j&v8iO+oDE8qCe4}S8C-~8b({|J=ce**I#K?q7Pf)j#}gd#Ly2unD^6M=|CA~I2k
zN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRC
zi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES
z0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2
z!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erK
ztYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$x
za*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x
z&JTX_i{Jd=FaHRX!G8ktA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|
zOFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBrAusvJPXP*2
zh{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#h{iObDa~k3
z3tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e2u3oB(Trg%
z;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$
zY+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3
za*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0@{8a6;V=IP
zl+k|z^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;lYjscl8D44
zAt}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<
ziqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe
z2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?
z)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z
z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~
z@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$acx0`nh12ud)5
z6M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%
zAuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#Jc
ziON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$
z4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s
z^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY
z9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6
z@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=Xe**I#K?q7Pf)j#}gd#Ly2unD^
z6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@
zAuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{O
zi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax
z00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd
z%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LV
zoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu
z@`=xU;Va+x&JTX_i{Jd=FaHRX#eV|xA3+F8FoF|;kc1*MVF*h&!V`grL?SX#h)Oh~
z6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr$W9J&l8fBr
zAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nup)U2PPXij#
zh{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8h`|hDD8m@e
z2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S3Rbd;)vRGH
z>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf=Qz&=E^>*>
zT;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmreBmqK_|6Z0
z@{8a6;V=IPl+}L%^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd#3D9vh)X=;
zlYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7
zp(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mn
ziq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@
z1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S
z+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;
z+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2$ao#
z0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?5|W6-Bq1ru
zNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)
zp)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{E
ziOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G
z3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6
z``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCC
zJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=be**I#K?q7Pf)j#}
zgd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*ELQj&_)q#-To
zNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`
zp(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0
zi{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer
z0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#
z$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09
zyx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX!+!$vA3+F8FoF|;kc1*MVF*h&!V`gr
zL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1ooGLnhRWFafr
z$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)PQj6Nu
zp)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD&j1E8
zh`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^nDa%;S
z3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~)12Wf
z=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#KJtmr
zeBmqK_|6Z0@{8a6;V=IPl+%9#^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbXQHV-3q7#Fd
z#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~a*~VO<RLHl
z$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1
zp()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$pVJO2G&Im>_
ziqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qtiq))PE$dj%
z1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q
z*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4-}ufCe)5ao
z{NXSE2$aiz0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)>?
z5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^
zC{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!
zp)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcni
ziOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!4
z4tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U
z_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|J=Ze**I#
zK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkcJmQmp01}dj#3Ugp$w*EL
zQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;
zC{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=
zp)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QU
zi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf
z0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF
z&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaHRX$A1FzA3+F8FoF|;kc1*M
zVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|OFZI}fB+Jbh{PlzDalAq3R04a)TALT=}1oo
zGLnhRWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>
zs7?)PQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~o
zp)dXD&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*
zh{Y^nDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?
z2~Ki~)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^
z?|9D#KJtmreBmqK_|6Z0@{8a6;V=IPl-GX(^B+M7N-%;Gf{=tFG+_u!IKmTwh(sbX
zQHV-3q7#Fd#3D9vh)X=;lYjscl8D44At}j7P6|?ziqxbbE$K*41~QU~%w!=e*~m@~
za*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB%2c5$)u>JlYEp~Z)S)i*
zs80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpadeV#D^r0{P=+6KKGKj$p
zVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`n9l+hvWUejVJXX4&I(qt
ziq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm;V8#A&IwL(iqo9oEay1S
z1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}hir2j1E$?{G2R`zN&wSx4
z-}ufCe)5ao{NXSE2$au%0`nh12ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGB
zafnMi;*)>?5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ
z3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WY
zXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fR
zVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQK
ziOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq
z4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b(
z{|J=de**JA1@#ath=N2>u%m6iwr$(CZQHhO+qP}nwryK~au&I$sP(x)2ud)56M~S0
zA~azLOE|(4frvyRGEs<10MUp}3}O<C*u)_&@rX|X5|W6-Bq1ruNKOh;l8V%%AuZ`h
zPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)p)BPnPX#JciON)=
zD%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{EiOzJPE8XZ$4|>vz
z-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G3}!Nm+00=s^O(;9
z7P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6``FI`4swXY9N{R(
zIL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCCJmD$Nc+Lx6@`~5I
z;VtiY&j&v8iO+oDE8qCe4}S8C-~8b(|M-tU1@%v0f)JEo1SbR`2}Nka5SDO+Cjt?P
zL}a26l>nj<ofyO<7O{y#T;dU*1SBL8iAh3Il98Mgq$CxoNkdxFk)8}>Bomp*LRPYo
zogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^q!N{>LRG3!of_1n7PYBE
zUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%cogVb07rp62U;5FX0Ssgi
zgBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbdT;?&K1uSF{i&?@_ma&`_
ztYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WKhdIJgj&Yn5oa7XzIm21b
zah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)yyO+HdBa=Y@tzNS<P)Fy
z!dJfWoge(<7r*(#U;gnQfePuLzyu*E!3a(WLK2G5gdr^92u}ne5{bw}Au0hxBRVmN
zNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?
zKLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`HF{BO23$
zrZl5DEoezATGNKMw4*&8=tw6z(}k{dqdPt5NiTZShraZqKLZ%ZAO<spp$ua<BN)jj
zMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3whq=sSJ_}gLA{MiRr7UAPD_F@YR<nk+tYbYJ
z*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-Ar5ndqa5QnCpgI|PIHE{oZ~zfxX2|gbA_v1
z<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01FL=o-Uh{^xyyHC|_{b+d^M$W`<2yh2$uEBM
zhrj&eKLQojKY<BCP=XPh5QHQYp$S7+!V#VbL?jZCi9%EYh(>f`5R+KMCJu3lM|={H
zkVGUV2}wyta#E0zRHP;iX-P+VGLVr>WF`w)$wqc^kds{GCJ%YZM}7)WkU|uu2t_GI
zaY|5<Qk13)WhqB_Do~M1RHh15sYZ2bP?K8JrVe$fM|~R5kVZ772~BB6b6U`nR<x!K
zZD~h)I?$0$bfybk=|*>Y(34*DrVoATM}Gz|kU<P)2tygha7HkaQH*8`V;RSICNPmn
zOlAsGnZ|TxFq2u#W)5?i$9xvBkVPzJ2}@bVa#paCRjg(WYgxy7Hn5RRY-S5v*~WHu
zu#;WvW)FMW$9@iQkV72i2uC@_aZYfOQ=H}uXF11tE^v`cT;>W_xyE&FaFbiy<_>qc
z$9*2~kVib`2~T;(b6)V0SG?v8Z+XXiKJbxGeC7*Z`NnsC@RMKs<_~}Q$A1JWqJIJt
zgrEc?I3Wm0C_)p4u!JK#5r{}6A`^wE1Q3nr#2_ZIh)o>g5|8*KAR&oJOcIikjO3&s
zC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#dl8^ippdf`POc9DwjN+7_B&8@#8Ol<Q
z@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##
zy3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7
zn9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7
z;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpS
zjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q_{|^w@{j)rR8;>2CI~?ZMsPw9l2C*u
z3}FdJcp?yyNJJ(IQ3)U#(TPD!ViB7-#3df_NkBppk(eYTB^k*{K}u4Qnlz*(9qGwH
zMlz9^EMz4c*~vjpa*>-n<Ru^ZDL_FAQJ5kWr5MF2K}kwcnlhB79ObD%MJiF5DpaK!
z)u};EYEhdy)TJKvX+T37(U>MQr5Vj>K}%ZEnl`kh9qs8rM>^4&E_9_E-RVJ3deNIc
z^ravD8NfgWF_<9?Wf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&N%w-<)S-?UT
zv6v++Wf{v^!Ae%Knl-Ft9qZY^MmDjTEo@~Q+u6ZRcCnj1>}4POIlw^<ahM|<<rv2~
z!AVYWnlqf`9Ot>fMJ{ofD_rFo*SWz>ZgHDC+~pqkdB8&+@t7w(<r&X;!AoB8nm4@V
z9q;+TM?UeHFMQ=2-}%8$e({?>{N*425vZ8{2}}@z5{%%4AS9s(O&G!wj_^bvB9Vwp
z6rvJ9G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%fsAA#Gg-(=HnNk0
zoa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7NbHL6pCn$)5;b*M`{
z>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$OedtR+`ZIum3}P@t
z7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>EMhTBSjsY%vx1eZ
zVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}ObApqc;xuPC%Q?<-
zfs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0%RAolfscIRGhg`1
zH@@?OpZwxCfB4Hk{v%Lv{S%lV1SJ^32|-9g5t=ZBB^=?2Ktv)DnJ7dhfM`S~1~G|6
zY~m1?c*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u2RX?_Zt{?qeB`G9
z1t~;ficpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{1~sWgZR${$deo-@4QWJUn$VPH
zG^YhEX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%hA@<23}*x*8O3PE
zFqUzQX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zsmavp%EN2BPS;cDBu$FbKX9FAA
z#Addzm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9OnclImKztaF%nN=K>eG#AU8<m1|t*
z1~<9IZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH=K~-4#Am+nm2Z6K2S546Z~pL?
zfBZ+F68a}FK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN&wM_P7Goai`c{=F7b#@0uqvl
z#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%0SZ!x!W5w>#VAe*
zN>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR&1g;wTGEQvw4p8S
zXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=gjA1O}7|#SIGKtAd
zVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGFvWd-XVJq9%&JK36
zi{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-&JAvIi`(4cF88?4
z10M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=FaP+DKqd80V1f{o
zU<4-wAqhoj!Vs2lgeL+Ki9}?g5S0L;5uF&sBo?uWLtNq!p9CZ%5s67cQj(FJ6r>~-
zsYydx(vhAFWF!-r$wF4Lk)0gmBp12KLtgTcp8^!55QQm1QHoKV5|pGAr71&M%2A#Q
zRHPD>sX|q%QJospq!zWQLtW}op9VCf5shg=Q<~A77PO=lt!YDB+R>g4bfgoV=|We!
z(VZUjq!+#ELtpyQp8*VH5Q7=QP=+y_5sYLMqZz|k#xb4=Ok@(1nZi`2F`XIAWEQiT
z!(8Sup9L&r5sO*EQkJot6|7_xt69TZ*0G)qY-AIg*}_(~v7H_4WEZ>H!(R5Wp937^
z5QjO!QI2t(6P)A}r#Zt}&T*a#T;vj$xx!Vhah)67<QBKN!(Hxip9ehT5s!JoQ=ajh
z7rf*ZuX)2;-tnFfeB=|K`NCJe@tq(1<QKpB!(aaKAAw5gpTGnmD8UF$2tpEy(1al@
z;RsIzA`*$nL?J2xL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`
zGLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgI
zs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?9q33WI@5)&bfY^x=t(bn(}%wF
zqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVpOk+ATn8_?=Gl#j%V?GO5$RZZA
zgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-vxmLxV?PHt$RQ4MgrgkeI43yC
zDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}gr_{?IWKt0D_--4x4h#$
zANa^8KJ$gIeB(Pm_{lGR^M}9u<39qG)<1y>LQsMcoDhU06rl-2Si%vW2t*_jk%>Z7
z0*FR*Vi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK74QWY7dNPoaOk^etS;<Cra*&f;
z<R%Y!$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tmN>ru_RjEdGYEY9})TRz~sYiVp
z(2zznrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5deD<z^rjDe=|_JCFpxnEW(Y$W
z#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UAna6w<u#iP8W(iAK#&TA$l2xo`
z4QpA)dN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB<_JeQ#&J$?l2e@K3}-pVc`k5~
zOI+p(SGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfql2^Ru4R3kJdp_`yPkiPJU-`y&
ze(;lD{N@jT`Nw|*Dx-e_6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs00v==)@o<v4~9^
z;u4SeBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOPxyVf(@{*7I6rdo5
zC`=KGQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayTG@v1kXiO8D(v0S`
zpe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cq
zj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF
z7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZb
zx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=zxd4`{_>Ci
z2vk=81SSYU2}W>25Ry=YCJbQ-M|dI-kw`=)3Q-9l8qtYCOkxq6IK(9$@ku~J5|NlB
zBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(Jme)G`6)m_3Q?FM6r~u&DM3j}
zQJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu8qt_0G^H8MX+cX`(V8~2r5)|*
zKu0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@8No<KF`6-qWgO#~z(gi7nJG+V
z8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lkWgY9;z(zK)nJsK(8{65zPIj@I
zJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`nJZl78rQkOO>S|UJKW_S_j$lW
z9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfCPk!;6Km6q%{}HI1{s~MFf)b42
zgdilL2u&Em5{~dhAR>{7OcbILKs2HggP6o3HgSkcJmM1&C}6;&Q2&=E;J}W+&fo#T
z&4O1#5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(Jme)G`6)m_3Q?FM
z6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu8qt_0G^H8MX+cX`
z(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@8No<KF`6-qWgO#~
zz(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lkWgY9;z(zK)nJsK(
z8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`nJZl78rQkOO>S|U
zJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfCPk!;6Km6q%{}HIX
zSs*Y$2ud)56M~S0A~azLOE|(4frvyRGEs<10MUp}3}O<C*u)_&@rX|X5|W6-Bq1ru
zNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^C{77VQi{@)
zp)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!p)KubPX{{E
ziOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcniiOEc1D$|(G
z3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!44tBDO-Rxm6
z``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U_qfjk9`cCC
zJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b(|M-tU74%PFf)JEo1SbR`
z2}Nka5SDO+Cjt?PL}a26l>nj<ofyO<7O{y#T;dU*1SBL8iAh3Il98Mgq$CxoNkdxF
zk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMxQJxA^q!N{>
zLRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%cogVb0
z7rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbdT;?&K
z1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WKhdIJg
zj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)yyO+H
zdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;gnQfhy{szyu*E!3a(WLK2G5gdr^92u}ne
z5{bw}Au0hxBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8g
zBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwq
zhq~0GJ`HF{BO23$rZl5DEoezATGNKMw4*&8=tw6z(}k{dqdPt5NiTZShraZqKLZ%Z
zAO<spp$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3whq=sSJ_}gLA{MiRr7UAP
zD_F@YR<nk+tYbYJ*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-Ar5ndqa5QnCpgI|PIHE{
zoZ~zfxX2|gbA_v1<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01FL=o-Uh{^xyyHC|_{b+d
z^M$W`<2yh2$uEBMhrj&eKLS<KKY<BCP=XPh5QHQYp$S7+!V#VbL?jZCi9%EYh(>f`
z5R+KMCJu3lM|={HkVGUV2}wyta#E0zRHP;iX-P+VGLVr>WF`w)$wqc^kds{GCJ%YZ
zM}7)WkU|uu2t_GIaY|5<Qk13)WhqB_Do~M1RHh15sYZ2bP?K8JrVe$fM|~R5kVZ77
z2~BB6b6U`nR<x!KZD~h)I?$0$bfybk=|*>Y(34*DrVoATM}Gz|kU<P)2tygha7Hka
zQH*8`V;RSICNPmnOlAsGnZ|TxFq2u#W)5?i$9xvBkVPzJ2}@bVa#paCRjg(WYgxy7
zHn5RRY-S5v*~WHuu#;WvW)FMW$9@iQkV72i2uC@_aZYfOQ=H}uXF11tE^v`cT;>W_
zxyE&FaFbiy<_>qc$9*2~kVib`2~T;(b6)V0SG?v8Z+XXiKJbxGeC7*Z`NnsC@RMKs
z<_~}Q$A1K>tbYO%grEc?I3Wm0C_)p4u!JK#5r{}6A`^wE1Q3nr#2_ZIh)o>g5|8*K
zAR&oJOcIikjO3&sC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#dl8^ippdf`POc9Dw
zjN+7_B&8@#8Ol<Q@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQDOcR>YjOMhUC9P;p
z8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SM
zlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w
z>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5z
za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q_{|^w@{j)rR7L*;
zCI~?ZMsPw9l2C*u3}FdJcp?yyNJJ(IQ3)U#(TPD!ViB7-#3df_NkBppk(eYTB^k*{
zK}u4Qnlz*(9qGwHMlz9^EMz4c*~vjpa*>-n<Ru^ZDL_FAQJ5kWr5MF2K}kwcnlhB7
z9ObD%MJiF5DpaK!)u};EYEhdy)TJKvX+T37(U>MQr5Vj>K}%ZEnl`kh9qs8rM>^4&
zE_9_E-RVJ3deNIc^ravD8NfgWF_<9?Wf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3c
zW-*&N%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qZY^MmDjTEo@~Q+u6ZRcCnj1>}4PO
zIlw^<ahM|<<rv2~!AVYWnlqf`9Ot>fMJ{ofD_rFo*SWz>ZgHDC+~pqkdB8&+@t7w(
z<r&X;!AoB8nm4@V9q;+TM?UeHFMQ=2-}%8$e({?>{N*425vZ#E2}}@z5{%%4AS9s(
zO&G!wj_^bvB9Vwp6rvJ9G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_A~k79OFGh%
zfsAA#Gg-(=HnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@fr?b3GF7Nb
zHL6pCn$)5;b*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIPH@ee<p7f$O
zedtR+`ZIum3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bC}CK=Cgo>
zEMhTBSjsY%vx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P9O5uXILa}O
zbApqc;xuPC%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L^MaSW;x%u0
z%RAolfscIRGhg`1H@@?OpZwxCfB4Hk{v%K|{S%lV1SJ^32|-9g5t=ZBB^=?2Ktv)D
znJ7dhfM`S~1~G|6Y~m1?c*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MDL}s#(m26}u
z2RX?_Zt{?qeB`G91t~;ficpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{1~sWgZR${$
zdeo-@4QWJUn$VPHG^YhEX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-ee)MMm0~y3%
zhA@<23}*x*8O3PEFqUzQX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zsmavp%EN2BP
zS;cDBu$FbKX9FAA#Addzm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9OnclImKztaF%nN
z=K>eG#AU8<m1|t*1~<9IZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH=K~-4#Am+n
zm2Z6K2S546Z~pL?fBZ+F>iQ=zK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN&wM_P7Goa
zi`c{=F7b#@0uqvl#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRCi`?WPFZsw%
z0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES0~*qZ#x$WR
z&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2!x+v8Mly=g
zjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erKtYIzdSkDGF
zvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$xa*4}a;VRd-
z&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x&JTX_i{Jd=
zFaP+DKsEGFV1f{oU<4-wAqhoj!Vs2lgeL+Ki9}?g5S0L;5uF&sBo?uWLtNq!p9CZ%
z5s67cQj(FJ6r>~-sYydx(vhAFWF!-r$wF4Lk)0gmBp12KLtgTcp8^!55QQm1QHoKV
z5|pGAr71&M%2A#QRHPD>sX|q%QJospq!zWQLtW}op9VCf5shg=Q<~A77PO=lt!YDB
z+R>g4bfgoV=|We!(VZUjq!+#ELtpyQp8*VH5Q7=QP=+y_5sYLMqZz|k#xb4=Ok@(1
znZi`2F`XIAWEQiT!(8Sup9L&r5sO*EQkJot6|7_xt69TZ*0G)qY-AIg*}_(~v7H_4
zWEZ>H!(R5Wp937^5QjO!QI2t(6P)A}r#Zt}&T*a#T;vj$xx!Vhah)67<QBKN!(Hxi
zp9ehT5s!JoQ=ajh7rf*ZuX)2;-tnFfeB=|K`NCJe@tq(1<QKpB!(aaKAAxG>pTGnm
zD8UF$2tpEy(1al@;RsIzA`*$nL?J2xL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<
zDpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd
z6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?9q33WI@5)&
zbfY^x=t(bn(}%wFqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVpOk+ATn8_?=
zGl#j%V?GO5$RZZAgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-vxmLxV?PHt
z$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~$Ri%}gr_{?
zIWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm_{lGR^M}9u<39q`(m#O-LQsMcoDhU06rl-2
zSi%vW2t*_jk%>Z70*FR*Vi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK74QWY7dNPoa
zOk^etS;<Cra*&f;<R%Y!$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tmN>ru_RjEdG
zYEY9})TRz~sYiVp(2zznrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5deD<z^rjDe
z=|_JCFpxnEW(Y$W#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UAna6w<u#iP8
zW(iAK#&TA$l2xo`4QpA)dN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB<_JeQ#&J$?
zl2e@K3}-pVc`k5~OI+p(SGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfql2^Ru4R3kJ
zdp_`yPkiPJU-`y&e(;lD{N@jT`Nw|*s;z$l6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@q
zs00v==)@o<v4~9^;u4SeBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd7P69!?BpOP
zxyVf(@{*7I6rdo5C`=KGQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQenwWv)U>QayT
zG@v1kXiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alc
zGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q
z%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjL
zBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1
z@BH8=zxd4`{_>Ci2vkS^1SSYU2}W>25Ry=YCJbQ-M|dI-kw`=)3Q-9l8qtYCOkxq6
zIK(9$@ku~J5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(Jme)G`6)m_
z3Q?FM6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu8qt_0G^H8M
zX+cX`(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@8No<KF`6-q
zWgO#~z(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lkWgY9;z(zK)
znJsK(8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`nJZl78rQkO
zO>S|UJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfCPk!;6Km6q%
z{}HIJ{s~MFf)b42gdilL2u&Em5{~dhAR>{7OcbILKs2HggP6o3HgSkcJmQmpgd`#{
zNk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKJrt5f)t`KMJP%!ic^A;
zl%h0cC`&oYQ-O+9qB2#eN;RregPPQ$Hg%{=J?hhdhBTrvO=wCpn$v=ow4ya_XiGcV
z(}9k3qBC9SN;kUGgP!!FH+|?!Kl(F(fed0WLm0|1hBJbZjAArn7|S@uGl7XrVlq>h
z$~2}kgPF`?HglNEJm#~2g)Cw*OIXS>ma~GDtYS55Sj#%rvw@9lVl!LV$~LyMgPrVR
zH+$I2KK65fgB;>8M>xtcj&p*OoZ>WRILkTCbAgLo;xbpb$~CTYgPYvqHg~woJ?`^>
zhdkmjPk72Rp7Vm2yy7))c*{H9^MQ|i;xk|P$~V6AgP;83H-GrcKmH?7J^d4yAOs~C
z!3jY~LJ^uUge4r|i9kdm5t%4NC4gu|Ck8QzMQq{_mw3b{0SQS&Vv>-QWF#jADM>|Y
z(vX&Pq$dLz$wX$dkd<s?CkHvnMQ-wtmwe=>00k*TVTw?cViczYB`HN|%21Yal&1m}
zsYGR}P?c&_rv^2tMQ!R(mwMEv0S#$HW17&EW;CY-Eont-+R&DEw5J0d=|pF`(3Ng<
zrw2XhMQ{4hmwxnT00SAsV1_W1VGL&kBN@eL#xRy~jAsH9nZ#tKFqLUcX9hEw#cbv<
zmwC)*0Sj5gVwSL!Wh`d}D_O;A*07d!tY-ro*~DhHu$66WX9qjk#cuYnmwoK#00%k5
zVUBQ=V;tuMCppDw&Ty7<oaX`;xx{6zaFuIZ=LR>q#cl3zmwVjj0S|e^W1jGoXFTTx
zFL}jl-td-pyypWS`NU_w@Re_T=LbLe#c%%bmw)_6p!)hJFhK}PFoF|;kc1*MVF*h&
z!V`grL?SX#h)Mv_h)xV*5{uZxAujQVPXZE>h{PlzDalAq3R04a)TALT=}1ooGLnhR
zWFafr$W9J&l8fBrAusvJPXP*2h{6=1D8(pF2})9m(v+br<tR@DDpHBcRG})>s7?)P
zQj6Nup)U2PPXij#h{iObDa~k33tG~O*0iB5?PyO2I?{>GbfGKV=uQuM(u>~op)dXD
z&j1E8h`|hDD8m@e2u3oB(Trg%;~38bCNhc1Okpb1n9dAlGK<;FVJ`ES&jJ>*h{Y^n
zDa%;S3Rbd;)vRGH>sZeQHnNG$Y+)<g*v<}ivWwm9VK4jG&jAi{h{GJ=D91R?2~Ki~
z)12Wf=Qz&=E^>*>T;VF$xXul3a*NyC;V$>M&jTLvh{rtPDbIM$3tsYy*Sz5^?|9D#
zKJtmreBmqK_|6Z0^6P&L)6=#P2mk=UPPT2^wrv|X+qP}nwr$(CZP%uqhxvrz7r*(#
zU;Ys&z<&Z0grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxG
zgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}a
zDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2
z_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmD
zrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm
z*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w
z;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)X;wd6NI1y
zBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4
zhP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&
zB`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o
z?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s
z<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_
zILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a
z;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-L`b0uzLw1S2>h2uUbH6Na#a
zBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39q
zg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<C
zEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad
z{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GO
zma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<
zIL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>
z;3J>-%oo1$jqm*6C;#z_-~8b({|MCBe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWm
zBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4e
zhrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<
zA&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0
z;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%
z*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?
zxXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO
z;3xm_i{Jd=FaHSC#D4-4grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9O
zBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSS
zgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhU
zC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=
z@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2
zwz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+
zxXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP
z)YN|h6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!s
zBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNG
zhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*
zBc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb
z>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh
z_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZ
zc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-M7f0uzLw1S2>h
z2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdU
zBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$
zg{oAeIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcI
zC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q
z`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;
zj&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9T
zc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCFe*zPPpadg0AqYt*LKB9tgd;o=
zh)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8g
zBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwq
zhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`
zAcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z
z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S
z&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW
z_{<l+@{RBO;3xm_i{Jd=FaHSC!hZr2grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#e
zh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+I
zBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQD
zOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6
zB%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo
z^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lo
zu5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5
z@r&R5;V=IP)Y5+f6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3Mcl
zNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=v
zqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3zn
zO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)j
zB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD
z?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6
z?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-M1d
z0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~
zNJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pX
zqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=
zOc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_
zCbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2
z{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=z
zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCDe*zPPpadg0AqYt*
zLKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U
z$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!Kj
zqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcP
zO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTM
zA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<
z<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH
z-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC#(x46grEc?I3Wm0C_)p4u!JK#5r{}6
zA`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{
z$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|L
zqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S
z%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCg
zC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!
z^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrd
zzVeOl{NN}5@r&R5;V=IP)Yg9j6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYl
zViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?
zC`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D
z(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$
z%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{
zBb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP
z>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838
z{NXSE2-MDh0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH
z5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sL
zC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1
z(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4
z%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7U
zC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E
z`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCHe*zPP
zpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2w
zQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTq
zs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7
z(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je
z%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6r
zAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n
z=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC!G8i1grEc?I3Wm0C_)p4
zu!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%w4@_F8OTT`
zGLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuhDpQ53RHHgI
zs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`
z(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG
z%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(
zB&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc
z_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)X{$e6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@q
zs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&a
za+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP
z2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd73}7IG7|alc
zGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q
z%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw%^A*ej`LjL
zBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1
z@BH8=|M838{NXSE2-L}c0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOf
zxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EHyyPQ41t>@%
z3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qijc800n$nEs
zw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!dj9?_A7|j^Q
zGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&NvX1p^U?ZE@
z%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXt
zCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6C;#z_-~8b(
z{|MCCe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?
zq$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TL
zN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2p
zbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNW
zGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS
z%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}
zA&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC#eV`5grEc?
zI3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=RIVng<DpHe%
zw4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@vXrAd6{tuh
zDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{lo#;##y3&pA
z^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`%wQ(7n9UsK
zGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M
z%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA%oCpSjOV=I
zC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)YX3i6NI1yBRC-lNhm@ShOmSq
zJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3JsHSICNh(S
ztYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~
zYEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd7
z3}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZ
zvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw
z%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`v
zBcJ%p7rye1@BH8=|M838{NXSE2-MAg0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$
zIx&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EH
zyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4G5qi
zjc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFsgBZ*ZhBA!d
zj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fHtY9UpSj`&N
zvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(a*p#{;3Ai}
z%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-%oo1$jqm*6
zC;#z_-~8b({|MCGe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%Ne
zJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3
zq7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE6PnVD=Cq(C
zt!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoa
zOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBT
zvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y
z%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_i{Jd=FaHSC
z!+!!3grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxGgrp=R
zIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}aDN0j@
zvXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p8`{#2_H>{l
zo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SMlbFmDrZSD`
z%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w>|iIm*v%gH
zvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5za*z8w;31EA
z%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)YE?g6NI1yBRC-l
zNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZNh(s4hP0$3
zJsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5SJQb)&B`Q;e
zs#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1
zz35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@
zEMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_
za*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk
z%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-M4e0uzLw1S2>h2uUbH6Na#aBRmm^
zNF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*F
zJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJT
zy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad{tRFs
zgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GOma>fH
ztY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<IL#T(
za*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>;3J>-
z%oo1$jqm*6C;#z_-~8b({|MCEe*zPPpadg0AqYt*LKB9tgd;o=h)5(N6NRWmBRVmN
zNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_BNiK4ehrHw?
zKLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0GJ`D(<A&qEE
z6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;
zqZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTE
zY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<
za*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3xm_
zi{Jd=FaHSC$A1D7grEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8
zNFoxGgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFq
zI3*}aDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpA?pdpQDOcR>YjOMhUC9P;p
z8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J37{)S=@l0SM
zlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1xo7l`2wz7@w
z>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*+~6j+xXm5z
za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN}5@r&R5;V=IP)YpFk
z6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_QlZ2!sBRMHZ
zNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lfNhwNGhO(5S
zJQb)&B`Q;es#K#oHK<7~YEy^0)T2HP2%sU2XiO8D(v0S`pe3znO&i+Mj`nn*Bc13>
z7rN4o?)0E1z35FJ`qGd73}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jz
zvzW~s<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t
z9N-{_ILr}_a*X4g;3TIw%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?
z@{H%a;3cnk%^TkGj`w`vBcJ%p7rye1@BH8=|M838{NXSE2-MGi0uzLw1S2>h2uUbH
z6Na#aBRmm^NF*W?g{VX$Ix&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&
zNG39qg{)*FJ2}WnE^?EHyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAe
zIyI<CEoxJTy40gS4G5qijc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!R
zANtad{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|
zi&)GOma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9S
zoZuv<IL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$
z@{ad>;3J>-%oo1$jqm*6C;#z_-~8b({|MCIe*zPPpadg0AqYt*LKB9tgd;o=h)5(N
z6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U$VetKlZC8gBRe_B
zNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!KjqdGOHNiAwqhq~0G
zJ`D(<A&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h=u8*7(v9x)peMcPO&|KwkNyl`AcGjp
z5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lXU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;O
zt60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$kNq6rAcr{25sq?<<DB3mr#Q_S&T@|P
zT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+
z@{RBO;3xmHQx8jkXjv2mtIM`++qP}nwr$(CZQHhO+pf+Jd4n9>;oTAI{NxwE`NLoS
z5um^R2}mFU6NI1yBRC-lNhm@ShOmSqJQ0XUBq9@qs6-<=F^EYlViSkB#3MclNJt_Q
zlZ2!sBRMHZNh(s4hP0$3JsHSICNh(StYjlQImk&aa+8O=<Rd=?C`cg+Q-q=vqc|lf
zNhwNGhO(5SJQb)&B`Q;es#K#oHK<7~YEy^0)T2HPXh<U((}bopqd6^TNh?~@hPJe$
zJss#sCpy!Gu5_b2J?Kd<deeu#^rJrm7|0+7GlZcGV>lxi$tXrMhOvxeJQJA6BqlS3
zsZ3)!GnmONW;2Jm%ws+aSjZw4vxKEAV>v5W$tqT}hPA9?Jsa4_CN{H$t!!gEJJ`uC
zcC&}Q>|;L%ILILmbA+QD<2WZc$tg~AhO?aGJQujgB`$M?t6bwcH@L|yZgYpb+~YnE
zc*r9j^Mt27<2f&Q$tzy-hPS-qJs<eUCqDCquYBV>KlsTne)EUF{3E~s{S%Nt1SSYU
z2}W>25Ry=YCJbQ-M|dI-kw`=)3Q>thbYc*bSi~j{afwHK5|EHYBqj+-Nk(!~kdjoS
zCJkvxM|v`lkxXPJ3t7oVc5;xDT;wJXdC5n93Q&+j6s8D8DMoQhP?A!VrVM2%M|mnx
zkxEpi3RS5_b!t$PTGXZvb*V>v8qknNG^PnnX-0Ee(2`cPrVVXrM|(QZkxq1`3tj0(
zcY4s1Ui799ed$Mk1~8C83}y&J8OCr%Fp^P>W(;E)$9N_%kx5Ku3R9WJbY?J<S<Ge*
zbD76{7O;>-EM^HyS;lf!u##1*W({ju$9gufkxgu73tQR7c6P9nUF>ELd)dc+4seh|
z9Oei|ImU5LaFSD;<_u>!$9XPrkxN|W3Rk(tb#8EzTioUjce%%X9`KMyJmv{cdB$^I
z@RC=&<_&Lo$9q2TkxzW)3t#!hcYg4bU;O3|fB8p%f%+#Pfe1_xf)b42gdilL2u&Em
z5{~dhAR>{7OcbILjp)Q6Cb5W39O4p>_#_}9iAYQml9G(%q#z}!NKG2jl8*FbAS0Q`
zOct_|jqKzgC%MQ?9`cfp{1l)dg(yrBic*Z?l%OP~C`}p4QjYRepdyv1Ockn9jq22(
zCbg(d9qLk#`ZS;+jc800n$nEsw4f!eXiXd1(vJ3Ypd+2=Oc%P+jqdcIC%x!RANtad
z{tRFsgBZ*ZhBA!dj9?_A7|j^QGLG>~U?P*4%oL_Fjp@u_CbO8$9Og2Q`7B@|i&)GO
zma>fHtY9UpSj`&NvX1p^U?ZE@%oet?jqU7UC%f3q9`>@2{T$#Rhd9g;j&h9SoZuv<
zIL#T(a*p#{;3Ai}%oVP3jqBXtCbziF9qw|E`#j(wk9f=zp7M<6yx=9Tc+DH$@{ad>
z;3J>-%oo1$jqm*6C%^d3AO7->0E6^TKmrk%AOs~C!3jY~LJ^uUge4r|i9kdm5t%4N
zB^uF*K}=#1n>fTJ9`Q*)LK2afBqSvn$w@&<QjwZ8q$M5c$v{Rjk(n%HB^%kvK~8d!
zn>^$tANeUjK?+frA{3<<#VJ8aN>Q3Jl%*WysX#?4QJE@Kr5e?#K}~8=n>y5`9`$KJ
zLmJVTCN!lP&1pePTG5&|w51*G=|D$1(U~rEr5oMpK~H+on?CfVAN?7?Kn5|GAq-_0
z!x_OyMlqT(jAb0-nZQIQF_|e$Wg63&!Axc`n>oy79`jkiLKd-@B`jqb%UQunR<W8j
ztYsbR*}z6Nv6(GwWgFYs!A^Fun?3AhANx7LK@M@4BOK)z$2q}CPH~zuoaG$nxxhs(
zahWSz<r>$y!A)*)n>*a)9`|{`Lmu&%Cp_gD&w0U1Uh$eYyyYG5`M^g$@tH4t<s0Am
z!B2kin?L;J9{~pIpMV4+FhK}PFoF|;kc1*MVF*h&!V`grL?SX#h)Oh~6N8w<A~tb|
zOFZI}fP^F>F-b^DGLn;ml%ygxX-G>t(vyLVWFj+J$VxV{lY^Y(A~$)+OFr^bfPxgF
zFhwXzF^W@yl9Zw}WhhHI%2R=gRH8Cfs7f`eQ-hk+qBeD?OFin-fQB@pF->SnGn&(a
zmb9WZZD>n7+S7rKbfPm|=t?)b(}SM$qBni$OF#NEfPoBRFhdy1ForXNk&I$AV;IXg
z#xsG5Oky%qn94M!GlQATVm5P_%RJ_@fQ2k#F-us=GM2M~m8@blYgo%V*0X_)Y+^H8
z*vdAxvxA-NVmEu(%RcsVfP)<3Fh@AbF^+SBlbqr-XE@6_&U1l_T;eiUxXLxIbAy}Q
z;x>1<%RTP%fQLNdF;95PGoJH;m%QRNZ+Oc)-t&QveBv`-_{ulF^MjxK;x~Wz%Rd4P
z(LVtRL|}ptlwbrW1R)7UXu=SdaD*oU5s5@(q7ap6L?;F@iA8MU5SMtwCjkjbL}HSV
zlw>3)1u02IYSNIFbfhN(8OcOuvXGT*WG4qX$whARke7VqrvL>hL}7|hlwuU81SKg&
zY06NRa+Ie66{$pJs!)|`RHp_tsYPw-P?vhtrvVLVL}QxJlx8%i1ubbsYueD3cC@Dh
z9qB}8y3mzwbf*VB=|yk)(3gJnX8;2k#9)Rnlwk~K1S1*6XvQ#>ag1jI6Pd(hrZAOh
zOlJl&nZ<18Fqe7EX8{XY#A24Plw~Yu1uI#_YSyrpb*yIt8`;EWwy>3LY-a~M*~M=5
zu$O)8=Ku#e#9@wblw%y{1SdJgY0hw#bDZY_7rDe`u5guWT;~Qixy5bnaF=`B=K&9S
z#ABZDlxIBW1uuEUYu@mdcf98VANj;*zVMZAeCG#0`NePk@RxrC7^;5)5{SSAAt=EJ
zP6$F0iqM21Ea3=G1R@fN$V4G3(TGkAViJqk#33&6h))6%l8D44At}j7P6|?ziqxbb
zE$K*41~QU~%w!=e*~m@~a*~VO<RLHl$WH+ZQi#G7p(w>DP6<j<iqe#!EafOq1u9aB
z%2c5$)u>JlYEp~Z)S)i*s80hL(ul@1p()L1P77Mniq^EDE$wJe2RhP;&UB$G-RMpa
zdeV#D^r0{P=+6KKGKj$pVJO2G&Im>_iqVW=EaMo@1ST?x$xLA?)0oZ-W-^P}%waC`
zn9l+hvWUejVJXX4&I(qtiq))PE$dj%1~#&Z&1_*S+t|(ycCw4z>|rna*v|nDa)`qm
z;V8#A&IwL(iqo9oEay1S1uk-l%Ut0q*SO9NZgPv;+~F?wxX%L~@`%Sg;VI8}&I?}h
zir2j1E$?{G2R`zN&wSx4-}ufCe)5ao{NXSE2rx|l1SAlF2|`eU5u6Z&Bov_uLs-HQ
zo(M!F5|N2QRH6}`7{nwNv57-m;t`(&BqR}uNkUSRk(?ByBo(PiLt4_2o(yCp6Pd|E
zR<e<u9ONVyxyeIb@{ykc6r>P^DMC?-QJfN#q!gtoLs`mEo(fc?5|yb!RjN^)8q}l~
zwW&j0>QSEtG^7!YX+l$)(VP~vq!q1cLtEO>o(^=R6P@WoSGv)i9`vLaz3D?=`q7^O
z3}g_48NyJ8F`N;MWE7(r!&t^Ko(W835|f$2RHiYV8O&rBvzfzO<}sfIEMyUjS;A75
zv78mGWEHDf!&=s{o(*hd6Pww>R<^O79qeQmyV=8D_OYJ>9OMv(Il@tnahwyJ<P@hl
z!&%O8o(o*$5|_EcRjzTJ8{Fg;x4FYz?s1<7Jme9NdBRhk@thaD<Q1=Z!&~0*o)3KF
z6QB9QSHAI`AN=GOzxl&o{t;lf{s~AR0uzLw1S2>h2uUbH6Na#aBRmm^NF*W?g{VX$
zIx&bzEMgOfxWpqq2}npH5|f0aBqKQ~NJ%PElZLdUBRv_&NG39qg{)*FJ2}WnE^?EH
zyyPQ41t>@%3R8rl6r(sLC`l<wQ--pXqdXO;NF^#$g{oAeIyI<CEoxJTy40gS4QNOs
z8q<WPG^05!Xh|zt(}uRRqdgtyNGCeeg|2j?J3Z)0FM895zVxF%0~p941~Y`A3}ZMW
z7|AF`GlsE@V>}a>$Rs8+g{e$qIy0EbEM_x@xy)le3s}e^7PEw<EMqw<Sjj3@vxc>-
zV?7(#$R;+kg{^F3J3H9PE_Snrz3gK@2RO(f4s(Q~9OF1AILRqabB42=<2)C*$R#dw
zg{xfSIybn<EpBs%yWHbG4|vEU9`l5!JmWbpc*!eX^M<#)<2@hv$R|GYg|B?$J3siz
zFMjifzx*S>2>la~Km;ZTK?z21LJ*QrgeDAO2}gJ$5RphkCJIrBMs#8jlUT$i4snS`
zd=ik5L?k8&Nl8X>Qjn5Vq$Uk%Nk@7zkdaJeCJR}~Ms{+LlU(E`4|&N)ehN^KLKLP5
zMJYycN>Gwgl%@=2DMxuKP?1VhrV3T5Ms;dXlUmfK4t1$VeHze^Ml_}gO=(7RTF{bK
zw5APhX-9iH(2-7brVCx^Mt6G9lV0?u4}IxJe+Dp+K@4UHLm9?!Mlh05jAjgD8OL}g
zFp)`2W(rf8#&l*dlUdAW4s)5ud={{fMJ#3sOIgNpR<M#)tY!^sS;u-du#rt{W(!-{
z#&&kFlU?j)4}00iehzSuLmcJ^M>)oEPH>V_oaPK?ImdY}aFI(~<_cH2#&vFRlUv;8
z4tKf7eID?TM?B^UPkF|3UhtAvyygvWdB=M`@R3h^<_ll>#&>@3lVAMi4}bYbfRXwq
zAb|)>5P}kn;DjI~p$JVF!V-?~L?9xOh)fis5{>A@ASSVhO&sD9kN6}YA&E##5|WaP
z<fI@asYp#4(vpt!WFRA%$V?Wpl8x--ASb!VO&;=+kNgy%AcZJQ5sFfb;*_8yr6^4q
z%2JN<RG=c2s7w{AQjO}=peD7bO&#h|kNPyAA&qEE6PnVD=Cq(Ct!Paf+R~2pbf6=h
z=u8*7(v9x)peMcPO&|KwkNyl`AcGjp5QZ|0;f!D;qZrK?#xjoaOkg6Dn9LNWGL7lX
zU?#Je%^c=3kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEY+xgs*vuBTvW@NRU?;oS%^vo$
zkNq6rAcr{25sq?<<DB3mr#Q_S&T@|PT;L*?xXcx<a*gZU;3l`Y%^mJ?kNZ5}A&+>>
z6Q1&n=e*!0uXxQH-tvz3eBdLW_{<l+@{RBO;3vQM%^&{qj{u|ePe1|@m>>it7{Lia
zNJ0^sFoY!>;fX**A`zJ=L?s&0i9t+a5t}%~B_8ofKtd9cm?R`68OcdON>Y)UG^8aR
z>B&GwGLe}qWF;Hf$w5wXk()f^B_H`IKtT#om?9LV7{w_;NlH<gGL)qp<*7hLDp8p#
zRHYi#sX<L@QJXr{r5^QZKtmeQm?ku(8O>=yOIp#IHngQ3?dd>AI?<Ufbfp{J=|N9=
z(VIT>r62tnz(58um>~>h7{eLCNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GHeWghcc
zz(N+Wm?bP_8OvG0N>;I&HLPVF>)F6YHnEv4Y-JnU*}+bBv70^YWgq)Fz(Edim?IqJ
z7{@umNltN^Go0ld=efW|E^(PFT;&?qxxr0tahp5b<sSEWz(XGKm?u2t8P9paOJ4Ds
zH@xK?@A<$-KJl3^eB~S8`N2<q@tZ&V<sSh?>z{xGA}~P+N-%;Gf{=tFG+_u!IKmTw
zh(sbXQHV-3q7#Fd#3D9vh)X=;lYoRIA~8uwN-~m@f|R5pHEBpoI?|JYjASA+S;$H@
zvXg_H<RUkD$V)!*Q-FdLqA*1$N->I4f|8V?G-W7DIm%Okid3R9Rj5ies#AlS)S@<Z
zs7pQS(}0FFqA^WqN;8_%f|j(RHEn21JKEEMj&!0kUFb?Ty3>Q6^rAO?=u1EPGk}2%
zVlYD($}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfkn9Drovw(#xVlhit$}*O-
zf|aadHEUSQI@Ys+jcj5wTiD7rwzGqs>|!^2*vmflbAW>!;xI=z$}x^}f|H!$G-o)=
zInHx|i(KL|SGdYGu5*K%+~PKOxXV56^MHpu;xSKn$}^txf|tDFHE(#!JKpnwk9^`Y
zU--&5zVm~h{Ngu%_{%>6jL|;<2}EFm5R_m9Cj=o0MQFkhmT-h80uhNsWTFt2XhbIl
zF^NTN;t-d3#3um>Nkn3jkd$O3Cj}`<MQYNJmUN^i0~yIgX0ni#Y-A?~ImtzC@{pH&
z<fi}yDMVq4P?Ta6rvxP_MQO@VmU5J*0u`x5WvWn>YE-8NHK|2y>QI+@)TaRrX+&e1
z(3EC0rv)u(MQhs7mUgtK10Cr^XS&dpZgi&yJ?TYn`p}nt^k)DA8N^_QFqB~oX9Ob|
z#c0MbmT`<{0u!0UWTr5cX-sDZGnvI~<}jCe%x3`$S;S(Nu#{yiX9X)+#cI~DmUXOW
z0~^`IX11`EZER-;JK4o<_OO?I?B@UnImBU(aFk;l=L9D?#c9rPmUEov0vEZ&Wv+0Q
zYh33BH@U@a?r@iT+~)xgdBkI$@RVmf=LIi$#cSU1mUq1810VUsXTI>2Z+zzmKl#OP
z{_vN71Q@G-0uqS81R*HF2u=t>5{l4-AuQntPXr<oiO57DD$$5e3}O<C*u)_&@rX|X
z5|W6-Bq1ruNKOh;l8V%%AuZ`hPX;oQiOggnE7{0S4sw!<+~grI`N&TJ3Q~x|6rm`^
zC{77VQi{@)p)BPnPX#JciON)=D%Ge?4Qf)0+SH*g^{7t+8q$cyG@&WYXif`S(u&r!
zp)KubPX{{EiOzJPE8XZ$4|>vz-t?g_{pimC1~Q1j3}Gn47|sYrGK$fRVJzbq&jcni
ziOEc1D$|(G3}!Nm+00=s^O(;97P5%NEMY0jSk4MovWnHLVJ+)e&jvQKiOp<bE8E!4
z4tBDO-Rxm6``FI`4swXY9N{R(IL--9a*ETO;VkDk&jl`WiOXE!D%ZHq4Q_Ia+uY$U
z_qfjk9`cCCJmD$Nc+Lx6@`~5I;VtiY&j&v8iO+oDE8qCe4}S8C-~8b({|GQn{{$ou
zfeAuTf)Sh$gd`N92}4-I5uOM{BodK{LR6v=ofyO<7O{y#T;dU*1SBL8iAh3Il98Mg
zq$CxoNkdxFk)8}>Bomp*LRPYoogCyO7rDtpUh<Kj0u-bWg(*T&icy>rl%y1;DMMMx
zQJxA^q!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#
zLRY%cogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz
z7PFbdT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{
z103WKhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opV
zp7ER)yyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Ytby#5JDAOaJFpadg0AqYt*
zLKB9tgd;o=h)5(N6NRWmBRVmNNi1R$hq%NeJ_$%jA`+8?q$DFbDM(2wQj><Xq$52U
z$VetKlZC8gBRe_BNiK4ehrHw?KLsdAAqrE3q7<VzB`8TLN>hfil%qTqs7NI$Q-!Kj
zqdGOHNiAwqhq~0GJ`HF{BO23$rZl5DEoezATGNKMw4*&8=tw6z(}k{dqdPt5NiTZS
zhraZqKLZ%ZAO<spp$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK$t-3whq=sSJ_}gL
zA{MiRr7UAPD_F@YR<nk+tYbYJ*vKX}vxTi}V>>(8$u4%YhrR4$KL<F-Ar5ndqa5Qn
zCpgI|PIHE{oZ~zfxX2|gbA_v1<2pCE$t`Ykhr8V4J`Z@vBOddFr#$01FL=o-Uh{^x
zyyHC|_{b+d^M$W`<2yh2$uEBMhrj$Izy$phkU#_`2tf%(a6%B0P=qE7VF^cgA`p>C
zL?#MRiAHo{5R+KMCJu3lM|={HkVGUV2}wyta#E0zRHP;iX-P+VGLVr>WF`w)$wqc^
zkds{GCJ%YZM}7)WkU|uu2t_GIaY|5<Qk13)WhqB_Do~M1RHh15sYZ2bP?K8JrVe$f
zM|~R5kVZ772~BB6b6U`nR<x!KZD~h)I?$0$bfybk=|*>Y(34*DrVoATM}Gz|kU<P)
z2tygha7HkaQH*8`V;RSICNPmnOlAsGnZ|TxFq2u#W)5?i$9xvBkVPzJ2}@bVa#paC
zRjg(WYgxy7Hn5RRY-S5v*~WHuu#;WvW)FMW$9@iQkV72i2uC@_aZYfOQ=H}uXF11t
zE^v`cT;>W_xyE&FaFbiy<_>qc$9*2~kVib`2~T;(b6)V0SG?v8Z+XXiKJbxGeC7*Z
z`NnsC@RMKs<_~}QM}Ud?Cm?|cOb~(+jNpVIB%ugR7{U^c@I)XYk%&wbq7seh#2_ZI
zh)o>g5|8*KAR&oJOcIikjO3&sC8<bF8q$)E^kg6-naE5QvXYJL<RB-x$W0#dl8^ip
zpdf`POc9DwjN+7_B&8@#8Ol<Q@>HNAm8eV=s#1;W)SxD{s7)Q}QjhvHpdpQDOcR>Y
zjOMhUC9P;p8`{#2_H>{lo#;##y3&pA^q?ob=uIE`(vSWOU?77S%n*h$jNy!6B%>J3
z7{)S=@l0SMlbFmDrZSD`%wQ(7n9UsKGLQKzU?GcG%o3KejODCgC97D?8rHIo^=x1x
zo7l`2wz7@w>|iIm*v%gHvXA{7;2?)M%n^=qjN_c(B&Rsd8P0N!^IYH}m$=Lou5yj*
z+~6j+xXm5za*z8w;31EA%oCpSjOV=IC9inR8{YDc_k7?ZpZLrdzVeOl{NN|Q_{|^w
z@{a(M^iMzn5ttwZB^bd8K}bRonlOYV9N~#TL?RKHC`2V1(TPD!ViB7-#3df_NkBpp
zk(eYTB^k*{K}u4Qnlz*(9qGwHMlz9^EMz4c*~vjpa*>-n<Ru^ZDL_FAQJ5kWr5MF2
zK}kwcnlhB79ObD%MJiF5DpaK!)u};EYEhdy)TJKvX+T37(U>MQr5Vj>K}%ZEnl`kh
z9qs8rM>^4&E_9_E-RVJ3deNIc^ravD8NfgWF_<9?Wf;R5!AM3inlX%J9OIe5L?$tr
zDNJP=)0x3cW-*&N%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qZY^MmDjTEo@~Q+u6ZR
zcCnj1>}4POIlw^<ahM|<<rv2~!AVYWnlqf`9Ot>fMJ{ofD_rFo*SWz>ZgHDC+~pqk
zdB8&+@t7w(<r&X;!AoB8nm4@V9q;+TM?UeHFMQ=2-}%8$e({?>{N*13ChMPo1R^j&
z2ud)56M~S0A~azLOE|(4frvyRGEs<1G@=uOn8YGBafnMi;*)@cBqA|MNJ=u2lY*3_
zA~k79OFGh%fsAA#Gg-(=HnNk0oa7=mdB{sX@>76<6rwOiC`vJkQ-YF|qBLbFOF7C@
zfr?b3GF7NbHL6pCn$)5;b*M`{>eGORG@>z0Xi77h(}I??qBU)3OFP=rfsS;dGhOIP
zH@ee<p7f$OedtR+`ZIum3}P@t7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~
zbC}CK=Cgo>EMhTBSjsY%vx1eZVl``6%R1JxfsJfpGh5ioHny{ao$O*ad)Ui9_H%%P
z9O5uXILa}ObApqc;xuPC%Q?<-fs0(?GFQ0DHLi1mo800yceu+v?(=|$JmN7=c*--L
z^MaSW;x%u0%RAolfscIRGhg`1H@@?OpZwxCfB4Hk0!-0A0SQE4f)JEo1SbR`2}Nka
z5SDO+Cjt?PL}a26m1smK1~G|6Y~m1?c*G|G2}wj^l8}^SBqs$aNkwYXkd}0$Cj%MD
zL}s#(m26}u2RX?_Zt{?qeB`G91t~;ficpkd6sH6wDMe|@P?mC(rveqJL}jW_m1<O{
z1~sWgZR${$deo-@4QWJUn$VPHG^YhEX+>+=(3W<zrvn}7L}$9tm2PyW2R-RUZ~D-e
ze)MMm0~y3%hA@<23}*x*8O3PEFqUzQX95$M#AK#0m1#_81~Zw(Z00bRdCX@43t7Zs
zmavp%EN2BPS;cDBu$FbKX9FAA#Addzm2GTi2RqrtZuYR3eeCA|2RX!Hj&PJ?9Oncl
zImKztaF%nN=K>eG#AU8<m1|t*1~<9IZSHWFd)(&%4|&96p74}sJm&>3dBtnq@RoPH
z=K~-4#Am+nm2Z6K2S546Z~pL?e*~DSe*zMSzyu*E!3a(WLK2G5gdr^92u}ne5{bw}
zAu7>`P7Goai`c{=F7b#@0uqvl#3Ugp$w*ELQj&_)q#-ToNKXbbl8MY@AuHL)P7ZRC
zi`?WPFZsw%0SZ!x!W5w>#VAe*N>Yl_l%Xu;C{G0{Qi;k`p(@p=P7P{Oi`vwoF7>ES
z0~*qZ#x$WR&1g;wTGEQvw4p8SXio<^(uvM=p)1|!P7iw0i{A91Fa7Ax00uIM!3<$2
z!x+v8Mly=gjA1O}7|#SIGKtAdVJg#@&J1QUi`mR!F7uer0v57}#Vlbd%UI3|R<erK
ztYIzdSkDGFvWd-XVJq9%&JK36i{0#DFZ<Zf0S<DA!yMr#$2iUjPI8LVoZ&3zIL`$x
za*4}a;VRd-&JAvIi`(4cF88?410M2-$2{RF&v?!YUh<09yx}eHc+Uqu@`=xU;Va+x
z&JTX_i{Jd=FaHQIP5%TW5P=CoP=XPh5QHQYp$S7+!V#VbL?jZCi9%GO5uF&sBo?uW
zLtNq!p9CZ%5s67cQj(FJ6r>~-sYydx(vhAFWF!-r$wF4Lk)0gmBp12KLtgTcp8^!5
z5QQm1QHoKV5|pGAr71&M%2A#QRHPD>sX|q%QJospq!zWQLtW}op9VCf5shg=Q<~A7
z7PO=lt!YDB+R>g4bfgoV=|We!(VZUjq!+#ELtpyQp8*VH5Q7=QP=+y_5sYLMqZz|k
z#xb4=Ok@(1nZi`2F`XIAWEQiT!(8Sup9L&r5sO*EQkJot6|7_xt69TZ*0G)qY-AIg
z*}_(~v7H_4WEZ>H!(R5Wp937^5QjO!QI2t(6P)A}r#Zt}&T*a#T;vj$xx!Vhah)67
z<QBKN!(Hxip9ehT5s!JoQ=ajh7rf*ZuX)2;-tnFfeB=|K`NCJe@tq(1<QKpB!(aXp
zV7mSZNFV|egrEc?I3Wm0C_)p4u!JK#5r{}6A`^wEL?b#eh)FDB6Nk9OBR&a8NFoxG
zgrp=RIVng<DpHe%w4@_F8OTT`GLwa@WFtE{$Vo18lZU+IBR>TwNFfSSgrXFqI3*}a
zDN0j@vXrAd6{tuhDpQ53RHHgIs7WnqQ-`|LqdpC2NFy54gr+p3IW1^OD_YZrwzQ)?
z9q33WI@5)&bfY^x=t(bn(}%wFqdx-}$RGwYgrN*$I3pOzC`L1ev5aFp6PU;(CNqVp
zOk+ATn8_?=Gl#j%V?GO5$RZZAgrzKFIV)JnDps?GwX9=38`#JuHnWATY-2k+*vT$-
zvxmLxV?PHt$RQ4MgrgkeI43yCDNb{Svz+5R7r4kJE^~#eT;n=7xXCSUbBDX!<30~~
z$Ri%}gr_{?IWKt0D_--4x4h#$ANa^8KJ$gIeB(Pm_{lGR^M}9uBft#(6OcdzCI~?Z
zMsPw9l2C*u3}FdJcp?yyNJJ(IQHe%$Vi1#9#3l}LiAQ`AkdQ<qCJ9MNMsiY+l2oK7
z4QWY7dNPoaOk^etS;<Cra*&f;<R%Y!$wz()P>@0trU*qTMsZ3|l2VkW3}q=tc`8tm
zN>ru_RjEdGYEY9})TRz~sYiVp(2zznrU^}HMsr%wl2){)4Q**hdpgjOPIRUVUFk-5
zdeD<z^rjDe=|_JCFpxnEW(Y$W#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`UA
zna6w<u#iP8W(iAK#&TA$l2xo`4QpA)dN#0;O>AZhTiM2TcCeFO>}C&p*~fkkaF9bB
z<_JeQ#&J$?l2e@K3}-pVc`k5~OI+p(SGmS@Zg7)Z+~y8<xyOAT@Q_D5<_S-E#&cfq
zl2^Ru4R3kJdp_`yPkiPJU-`y&e(;lD{N@jT`A2}6`X?ZP2uu)y5{%%4AS9s(O&G!w
zj_^bvB9Vwp6rvK1=)@o<v4~9^;u4SeBp@M)NK6uvl8oe}ASJ0tO&ZdYj`U<8Bbmrd
z7P69!?BpOPxyVf(@{*7I6rdo5C`=KGQjFr1pd_UzO&Q8kj`CEXB9*926{=E=>eQen
zwWv)U>QayTG@v1kXiO8D(v0S`pe3znO&i+Mj`nn*Bc13>7rN4o?)0E1z35FJ`qGd7
z3}7IG7|alcGK}GjU?ig$%^1cqj`2)jB9oZR6s9tb>C9jzvzW~s<}#1@EMOsvSj-ZZ
zvW(@dU?r<q%^KFSj`eI{Bb(UF7PhjD?d)JDyV%Vh_Og%t9N-{_ILr}_a*X4g;3TIw
z%^A*ej`LjLBA2+#6|QoP>)hZbx46w6?sAX&Jm4XZc+3-?@{H%a;3cnk%^TkGj`w`v
zBcJ%p7rye1@BH8=zxd4`{_>9iv-D3u0uh)X1SJ^32|-9g5t=ZBB^=?2Ktv)DnJ7dh
z8qtYCOkxq6IK(9$@ku~J5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0PI8f(
zJme)G`6)m_3Q?FM6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e^=Uvu
z8qt_0G^H8MX+cX`(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+3}qO@
z8No<KF`6-qWgO#~z(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zHv6?lk
zWgY9;z(zK)nJsK(8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^z(p=`
znJZl78rQkOO>S|UJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h8{hfC
zPk!;6Km6q%0cPu;fCM5iK?q7Pf)j#}gd#Ly2unD^6M=|CA~I2kN;IMqgP6o3HgSkc
zJmQmpgd`#{Nk~dEl9Pgzq#`wGNJ~1>lYxw6A~RXYN;a~SgPi0dH+jfQKB5GO(*JOz
zC<h;gNF8PW_JE#2qXd=3!w~-YzX1OW6Ffjb0uh)X1SJ^32|-9g5t=ZBB^=?2Ktv)D
znJ7dh8qtYCOkxq6IK(9$@ku~J5|NlBBqbTiNkK|dk(xB5B^~L>Kt?i=nJi=_8`;T0
zPI8f(Jme)G`6)m_3Q?FM6r~u&DM3j}QJON8r5xp{Kt(E1nJQGJ8r7*mO=?k_I@F~e
z^=Uvu8qt_0G^H8MX+cX`(V8~2r5)|*Ku0>!nJ#pt8{O$aPkPatKJ=v@{TaYO1~Hf+
z3}qO@8No<KF`6-qWgO#~z(gi7nJG+V8q=A<OlC2gIm~4q^I5<`7O|KmEM*zXS;0zH
zv6?lkWgY9;z(zK)nJsK(8{65zPIj@IJ?v#4`#Hct4sn<x9OW3tIl)Ozahfxn<s9d^
zz(p=`nJZl78rQkOO>S|UJKW_S_j$lW9`TqbJmneBdBICw@tQZh<sI+&z(+punJ;|h
z8{hfCPk!;6Km6q%0p9%huYWrfq!5KELQ#rQoD!6z6s0LcS;|qK3RI*Lm8n8is!^R9
z)T9=*sY6}rQJ)4hq!Ep2LQ|U2oEEgC6|HGQTiVf{4s@gwo#{eXy3w5;^rRQP=|f-o
z(VqbfWDtWH!cc}WoDqy<6r&l#SjI7)2~1=XlbOO)rZJrv%w!g`nZsP>F`or2WD$#5
z!cvy8oE5BO6{}gpTGp|i4Qyl+o7uuvwy~WZ>|__a*~4D;v7ZAR<Pe8B!cmTKoD-bn
z6sI}ES<Z2u3tZ$9m$|}Ku5q0k+~gLwxx-!Vai0e~<Pnc~!c(5{oEN<06|Z^2Ti)@W
z4}9bkpZUU9zVV$O{NxwE`NLoS5g_J=aQ__uLlmSCg(*T&icy>rl%y1;DMMMxQJxA^
zq!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esVw4@cSX+vAu(Vh-;q!XR#LRY%c
zogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{F`fxbWD=8^!c?X)of*tz7PFbd
zT;?&K1uSF{i&?@_ma&`_tYj6dS;Jb^v7QZVWD}d&!dAAiogM6C7rWWRUiPt{103WK
zhdIJgj&Yn5oa7XzIm21bah?lY<Pw*;!d0$uog3Wb7Pq;>UG8z82R!5vk9opVp7ER)
zyyO+HdBa=Y@tzNS<P)Fy!dJfWoge(<7r*(#U;Ysw=8I_m9RNcVq!5KELQ#rQoD!6z
z6s0LcS;|qK3RI*Lm8n8is!^R9)T9=*sY6}rQJ)4hq!Ep2LQ|U2oEEgC6|HGQTiVf{
z4s@gwo#{eXy3w5;^rRQP=|f-o(VqbfWDtWH!cc}WoDqy<6r&l#SjI7)2~1=XlbOO)
zrZJrv%w!g`nZsP>F`or2WD$#5!cvy8oE5BO6{}gpTGp|i4Qyl+o7uuvwy~WZ>|__a
z*~4D;v7ZAR<Pe8B!cmTKoD-bn6sI}ES<Z2u3tZ$9m$|}Ku5q0k+~gLwxx-!Vai0e~
z<Pnc~!c(5{oEN<06|Z^2Ti)@W4}9bkpZUU9zVV$O{NxwE`NLoS5g_J+c>f&$LlmSC
zg(*T&icy>rl%y1;DMMMxQJxA^q!N{>LRG3!of_1n7PYBEUFuPv1~jA*jcGztn$esV
zw4@cSX+vAu(Vh-;q!XR#LRY%cogVb07rp62U;5FX0SsgigBik5hB2HGjARs}8N*n{
zF`fxbWD=8^!c?X)of*tz7PFbdT;?&K1uSF{i&?@_ma&`_|8MWUzq)?!gO78kCN*ty
zcckAm`8I9Sk<m77nvSGt)241*h`1*rZo!3$Tfn{d#=SQ<!Ht4@p}2A3-s0Ze58L}b
z=l&1!!{_4oa2)yg!*zVVcwe7$t_xPNnl-Ft9qY*=n+<Ga6Pww>R&vN?8{65zPIj@I
zJ>;>MedJTXehzSuLmcJ^M>)oEPH>V!PH~zd&Ty7<oaX`;xkNFSxx!Vhah(!waFbiy
z<_>qc$9*2~kVib`2~R2I8P9paOJ4DsH@xK?@A<$-K2b*0mlu8*27raANF^#$g{o8|
zn(EY`Cbg(d40VX5E^*YOJ`HF{BO23$rZl5DEoezRtw^9XiL{|D?PyO2I?{>GB#}%P
zy3&pA^dN<v^rAPZ^r0{P=+6KKGKj&Xkxm9f7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!
zG^R6ynapA~bI4>a^O(;97P5%NEMY0jSk4MovWnHLVJ+)ePZrs1U?ZE@%oetiLoVCc
z&JK36i{0!YkG<?8p91!CfP)<3Fh@AbF^+SBlN54_(-d)rvz+5R7r4kJin+`cu5yj*
zlyHNa+~PKOxXV56^MHpu;xSKnN-58H&I?}hir2j1E$?{G2R`zNGNQgL{7D!97NR1R
zs7w{AQjKV;Q-hk+qBb$qA(pztQIGmGpdpQDOcR>YjOMhUCGoT(fz~9_hPJe$Jss#s
zCpwcvGF|9OH@ee<6nfH&-lWopzVxF%0~p9429riQ84O`4!x+v8Mly=gjA1O}7|#SI
zGKtAdVJg#@&J1QUi`mQ}lex@eJ_}gLA{MiRr7UAPD_F@YR<nk+tYbY{WV3;dY+^H8
z*h&t$Y-2k+*vT$-vxhwPvX6WU*v|nDa)`qm;V8#A&IwLZ$SF=!#2L<Vj`LjLB9|!U
zGFQ0DHLg>_4Q_Ia+uY$U_qfjk9`cCCJmD#&JmWbpc*!eX^M<#)<2@hv$S2B(`ts1v
z!vL@l6{$pJs!)|`L{ps_)T9=*iJ=a$)FqC3)TaRrX+&e1(3EC0rv)vErxgjbCXqI@
zr5)|*Ku0>!nIw|wLRY%cogSpnlV0>Dl|J;PAN?7?Kn5|GG}6gn2tygha7HkaQH*8`
zV;RSICNPmnOlAsGnZ|TxFq2u#W)7LmWghccz(N+Wm?bP_8OvG0N>;I&HLPVF>&YUU
z4Qyl+o7uuva>!*H+u6ZRcCnj1<gu51<Ws<Y4seh|9Oei|ImU5LaFRk!ahf8|aF%nN
z=K>eGL@}4S!d0$uof2+vlUv;84tKf7eID?TM?B^UPbuXY&w0U1Uh$eYyyYG5`M^g$
zQAX64dA|w+z(Q1{5|yb!RjLt9b!t$PTGS?nI>b_!IO<WK1~jA*jcGztn$esVv?QKZ
zB+!~f+R&DEw5J0d=|pFeNTv&2=|*>YkU~#-(VJBI(3gJnX8;2k#9-1$CxamjWf;R5
z!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&NWHOg|%x3`$S;S(Nu#{yiX9X)+#cI~D
zmUXNri)=Qqkxgu73tPz{mu+lk2RqrtZuXGJUiOhs0sA??K@M@4BOK)z$2q}C3OU7T
zia5hr&T*a#T;vkPT;>W_xyE%$xWP?sahp5b<sSEWz(XGKm?u1?lxIBW1uuEUYu@md
zcf98VANfQXQD5d(3j@GHRHPD>sX|q%5lwY!P?K8JCWbo1QkOXDQJ)4hq!Ep2LQ|U2
zoEEeso>nB#nnc>rmUgtK10Cr^XOc*!3tj0(cY2UQPkPatRQk}De)MMm0~y3%(nu$R
zAq-_0!x_OyMlqT(jAb0-nZQIQF_|e$Wg63&!Axc`n>l1MmwC)*0Sj5gVwSL!Wh`d}
zD_O;A*07d!tS5_XHn5RRY-S5v$sw0*Y-a~M*~M=5kjGy3kxv2pIlw^<ahM|<<rv2~
z!AS}^#c7H-!&%O8o(o*$62)BR3Rk(tbxOFwO>S|UJKW_S_j$lW9`TqbJf)OpJm&>3
zdBtnq@RoPH=K~-4L>W<EX4eS=z(Q1{5|yb!RjLt9b!t$PTGS?nI>b_!IO<WK1~jA*
zjcGztn$esVv?QKZB+!~f+R&DEw5J0d=|pFeNTv&2=|*>YkU~#-(VJBI(3gJnX8;2k
z#9-1$CxamjWf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&NWHOg|%x3`$S;S(N
zu#{yiX9X)+#cI~DmUXNri)=Qqkxgu73tPz{mu+lk2RqrtZuXGJUiOhs0sA??K@M@4
zBOK)z$2q}C3OU7Tia5hr&T*a#T;vkPT;>W_xyE%$xWP?sahp5b<sSEWz(XGKm?u1?
zlxIBW1uuEUYu@mdcf98VANfQXQD3fX6b68Ws7NI$Q-!KjBbw^epeD7bO$>F2r7m&Q
zqdpC2NFy54gr+p3IW1^OJgrEeHHoyLE$wJe2RhP;&Lojc7rN4o?(`sqp7f$Osq~>Q
z{pimC1~Q1jq>)YrLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?Hgm{iF7uer
z0v57}#Vlbd%UI3|R<erKtYIzdSWgz&Y+xgs*vuBTl0z=r*v<}ivWwm9A&<T6BcB5H
zbAW>!;xI=z$}x^}f|C?-iqjNvhO?aGJQujgC5pMs6|QoP>y&VVo800yceu+v?(=|$
zJmN7=cuFbHc+Lx6@`~5I;VtiY&j&v8i87+TTo@k)fQ6_?B`Q;es#GJI>eQenwWv)D
zb%>=danz$e4QNOs8q<WPG^05!Xh}S+NT4-|w4p8SXio<^(uvL_kxUo5(v9x)Acdav
zqBp7Zp)dXD&j1E8h{2?hP6k65$}omAf{~13G-DXcIL0%9iA-WLQ<%y$rZa<?%wjfk
z$Yd__n9l+hvWUejVJXX4&I(qtiq))PE$diM7TIiIBb(UF7PgW@F5B474tBDO-RvQc
zz3d~O0`_x&gB;>8M>xtcj&p*O6mp8w6mf>LoZ~zfxX2}nxy%)=a*gYhaD$uN;x>1<
z%RTP%fQLNdF;93(DbIM$3tsYy*Sz5^?|9D#KJtk&Twl;W3;+vJkxEpi3RS5_G}Wm=
zO=?k_80rv9UE-)meHze^Ml_}gO=(7RTF{bsT9H6&5@|zQ+R>g4bfgoVNg|mpbfp{J
z=|KuT=|yi+=|f-o(VqbfWDtW%Bb^L}FqB~oX9Ob|#c0MbmT`<{0u!0UWTr5cX-sDZ
zGnvI~=8(x;<}sfIEMyUjS;A75v78mGWEHDf!&=s{o-DH2z(zK)nJsK3hg`O?ogM6C
z7rWU*9(&nGJ_YRO00%k5VUBQ=V;tuMCn@9<rzzqLXF11tE^v`c6myv?T;&?qDd7e;
zxy5bnaF=`B=K&9S#ABZDlv1AYoEN<06|Z^2Ti)@W4}9bkW%&LjUBdvd5EZFJWvWn>
zYD80=8q}l~wTYn)vD77wdeo-@4QWJUn$VPHG^YhEiKi6_v?h@@w51*G=|D$1(U~NY
z=|We!(VZTo(34*DCY3()r62tnz(58um^9MKU<gAQ#&AY3l2MFi3}YF`cqTBBNla!6
zQ<=teW-yak%w`Um%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qY*=n+<Ga6Pww>R&vN?
z8{65zPIj@IJ>;>MedJTXehzSuLmcJ^M>)oEPH>V!PH~zd&Ty7<oaX`;xkNFSxx!Vh
zah(!waFbiy<_>qc$9*2~kVib`2~R2I8P9paOJ4DsH@xK?@A<$-K2b*0m!nd{0I(1h
zsYGR}P?c&#Q=J;rq!zV_p$@UsC60R3rvVLVL}QxJlx8%i1ucoE6$!K^kv6oY9qs8r
zM>^4&B$DYuSGv)i9;DEdUi2oFKJ=v@{TaYO1~Hg4(#c>5Lm9?!Mlh05jAjgD8OL}g
zFp)`2W(rf8#&l*dlUdAW4w=kl9`jkiLKd-@B`jqb%UQunR<W8jtYsbR$s(H#Y-AIg
z*}_(G$YmSb*}+bBv70^Qv6p@1Q^0->aF9bB<_JeQ#&J$?l0r^#nj+3{mUEov0vEYN
zF_*c*RjzTJ5^ivlTioUjce%%X9`KMyJmv{cDdic@dBICw@tQZh<sI+&z(+n&_Kot6
z^GAI(+y_v>YE+~Wm8n8isu4|fYEY9})Fy^H#8Q_y>QSEtG^7!YX+l$)(VP~vB%W3z
z(3(Wr(3W<zrvn}7L}!vnrVCx^Mt6FULQi_pn^gMHmwxnT00SAsVA4n@gCPuM7{eLC
zNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GGLGM9PGX8{XY#A24Plw~Yu1uI#_YSyrp
zb*v|gY&NiwO>AZhTgf4pZER-;JK4o<_K?S3_K{Bk`#Hct4sn<x9OW3tIl)N^ImKy;
zIKx@aah?lY<PybP<_cH2#&t@#!A)*)n>*a)9`|{`Lmu&%Cp@K;XFTTxFL}jl-td-p
zyypWS`9#?_%9o4!{aa<_!ZU!?s7NI$Q-!KjBbw^epeD7bO$>F2r7m&QqdpC2NFy54
zgr+p3IW1^OJgrEeHHoyLE$wJe2RhP;&Lojc7rN4o?(`sqp7f$Osq~>Q{pimC1~Q1j
zq>)YrLm0|1hBJbZjAArn7|S@uGl7XrVlq>h$~2}kgPF`?Hgm{iF7uer0v57}#Vlbd
z%UI3|R<erKtYIzdSWgz&Y+xgs*vuBTl0z=r*v<}ivWwm9A&<T6BcB5HbAW>!;xI=z
z$}x^}f|C?-iqjNvhO?aGJQujgC5pMs6|QoP>y&VVo800yceu+v?(=|$JmN7=cuFbH
zc+Lx6@`~5I;VtiY&j&v8iL%cV;19n1?^}Pq;?KW9Ilju*_)W_5b$*NA=6Co8|C`_C
z_xOGOfN%1Le8IQ)Bfiad_>w>7yZi}%%AfH){+z$yFZnCJ&ky(^KjN?X8~&Cb^LP9`
zKj9zvDgVen@z4AVKjY{8E5G31_;-HEfAF9DivOYl6{$pJs!)|`L{ps_)T9=*iJ=a$
z)FqC3)TaRrX+&e1(3EC0rv)vErxgjbCXqI@r5)|*Ku0>!nIw|wLRY%cogSpnlV0>D
zl|J;PAN?7?Kn5|GG}6gn2tygha7HkaQH*8`V;RSICNPmnOlAsGnZ|TxFq2u#W)7Lm
zWghccz(N+Wm?bP_8OvG0N>;I&HLPVF>&YUU4Qyl+o7uuva>!*H+u6ZRcCnj1<gu51
z<Ws<Y4seh|9Oei|ImU5LaFRk!ahf8|aF%nN=K>eGL@}4S!d0$uof2+vlUv;84tKf7
zeID?TM?B^UPbuXY&w0U1Uh$eYyyYG5`M^g$QTDn2Z-)LWSc{5OqB2#eN;RUXP7P{O
zi`v9chgj+oM?LD(fQB@pF->SnGn&(amc-MF1X`0w8`{#2_H>{lo#;#w$#kJB-RMpa
zQs_x9dXq{Y`qGd73}7IG7)%=JWH5xG3}ZMW7|AF`GlsE@V>}a>$Rs8+g{e$qIy0Eb
zEM_x@Oy)9=`7B@|i&)GOma>fHtY9UpSj`&NvX1p+k<A7+vWd-XVJkW0vW@NRU?;oS
z%^vdD%RcfcU_S>q$RQ4MgrgkeI43wsA*VP^5ob8dInHx|i(I0Z%Ut0q*SJmzH@L|y
zZgYpb+~YnEc*r9j^Mt3A@{H%a;3cnk%^TkGj`w`vBcCYy-2Wej{wr9Eid3R9Rj5ie
zqNz>|YEp~Z#88J=>Jmpi>eGORG@>z0Xi77h(}I@7(~1OIlSmud(vJ3Ypd+2=OcKd-
zp)1|!P7hM(NiTYnN+0^tkNyl`AcGi88tG&(grN*$I3pOzC`L1ev5aFp6PU;(CNqVp
zOk+ATn8_?=Glxv(GLQKzU?GcG%o3KejODCgC97D?8rHIo^<<IF1~#&Z&1_*SIpng9
z?d)JDyV%Vh^4QBh@+n|H2RO(f4s(Q~9OF1AI7uO=I86~}ILkTCbAgLoqL|BE;VRd-
zP6;=-$t`Ykhr8V4J`Z@vBOddFr<C%H=e*!0uXxQH-tvz3eBdLWDEr+17oq<O)}kVn
zs7w{AQjKV;Q-hk+qBb$qA(pztQIGmGpdpQDOcR>YjOMhUCGoT(fz~9_hPJe$Jss#s
zCpwcvGF|9OH@ee<6nfH&-lWopzVxF%0~p9429riQ84O`4!x+v8Mly=gjA1O}7|#SI
zGKtAdVJg#@&J1QUi`mQ}lex@eJ_}gLA{MiRr7UAPD_F@YR<nk+tYbY{WV3;dY+^H8
z*h&t$Y-2k+*vT$-vxhwPvX6WU*v|nDa)`qm;V8#A&IwLZ$SF=!#2L<Vj`LjLB9|!U
zGFQ0DHLg>_4Q_Ia+uY$U_qfjk9`cCCJmD#&JmWbpc*!eX^M<#)<2@hv$S2A^_y4WX
ze+6q%kxEpi3RS5_G}Wm=O=?k_80rv9UE-)meHze^Ml_}gO=(7RTF{bsT9H6&5@|zQ
z+R>g4bfgoVNg|mpbfp{J=|KuT=|yi+=|f-o(VqbfWDtW%Bb^L}FqB~oX9Ob|#c0Mb
zmT`<{0u!0UWTr5cX-sDZGnvI~=8(x;<}sfIEMyUjS;A75v78mGWEHDf!&=s{o-DH2
zz(zK)nJsK3hg`O?ogM6C7rWU*9(&nGJ_YRO00%k5VUBQ=V;tuMCn@9<rzzqLXF11t
zE^v`c6myv?T;&?qDd7e;xy5bnaF=`B=K&9S#ABZDlv1AYoEN<06|Z^2Ti)@W4}9bk
zWuN>1qtJf^Yf+I(RHh15sYW!_sX<L@QJWa*5KCR+s7HMo(2zznrU^}HMsr%wl6YE?
zKx-0dLtEO>o(^=R6P-ySnJ#pt8{O$a3O(sXZ&K+)U;5FX0SsgigGnQu42Ce2VGL&k
zBN@eL#xRy~jAsH9nZ#tKFqLUcX9hEw#cbw~$z0|!p9L&r5sO*EQkJot6|7_xt69TZ
z*0G)}vf02!HnEv4Y$b<Wwy~WZ>|__a*+U+C*+)JF?B@UnImBU(aFk;l=L9Dy<P@hV
z;tXdw$9XPrkxLYFnJZl78rLb|1~<9IZSHWFd)(&%4|&96p74}Xp7ER)yyO+HdBa=Y
z@tzNS<P&9|`~P<6zk;=>NF^#$g{o8|n(EY`Cbg(d40VX5E^*YOJ`HF{BO23$rZl5D
zEoezRtw^9XiL{|D?PyO2I?{>GB#}%Py3&pA^dN<v^rAPZ^r0{P=+6KKGKj&Xkxm9f
z7|Jk)GlG$fVl-nI%Q(g}fr(6FGE<n!G^R6ynapA~bI4>a^O(;97P5%NEMY0jSk4Mo
zvWnHLVJ+)ePZrs1U?ZE@%oetiLoVCc&JK36i{0!YkG<?8p91!CfP)<3Fh@AbF^+SB
zlN54_(-d)rvz+5R7r4kJin+`cu5yj*lyHNa+~PKOxXV56^MHpu;xSKnN-58H&I?}h
zir2j1E$?{G2R`zNvd{g0C-h&zT2!PGm8n8isu4|fYEY9})Fy^H#8Q_y>QSEtG^7!Y
zX+l$)(VP~vB%W3z(3(Wr(3W<zrvn}7L}!vnrVCx^Mt6FULQi_pn^gMHmwxnT00SAs
zVA4n@gCPuM7{eLCNJcT5F^pv#<C(xjCNY^QOl2C=nZZnEF`GGLGM9PGX8{XY#A24P
zlw~Yu1uI#_YSyrpb*v|gY&NiwO>AZhTgf4pZER-;JK4o<_K?S3_K{Bk`#Hct4sn<x
z9OW3tIl)N^ImKy;IKx@aah?lY<PybP<_cH2#&t@#!A)*)n>*a)9`|{`Lmu&%Cp@K;
zXFTTxFL}jl-td-pyypWS`9#_0{=W?USFjcpsYGR}P?c&#Q=J;rq!zV_p$@UsC60R3
zrvVLVL}QxJlx8%i1ucoE6$!K^kv6oY9qs8rM>^4&B$DYuSGv)i9;DEdUi2oFKJ=v@
z{TaYO1~Hg4(#c>5Lm9?!Mlh05jAjgD8OL}gFp)`2W(rf8#&l*dlUdAW4w=kl9`jki
zLKd-@B`jqb%UQunR<W8jtYsbR$s(H#Y-AIg*}_(G$YmSb*}+bBv70^Qv6p@1Q^0->
zaF9bB<_JeQ#&J$?l0r^#nj+3{mUEov0vEYNF_*c*RjzTJ5^ivlTioUjce%%X9`KMy
zJmv{cDdic@dBICw@tQZh<sI+&z(+n&_PPH*4*gfK78R*PWvWn>YD80=8q}l~wTYn)
zvD77wdeo-@4QWJUn$VPHG^YhEiKi6_v?h@@w51*G=|D$1(U~NY=|We!(VZTo(34*D
zCY3()r62tnz(58um^9MKU<gAQ#&AY3l2MFi3}YF`cqTBBNla!6Q<=teW-yak%w`Um
z%w-<)S-?UTv6v++Wf{v^!Ae%Knl-Ft9qY*=n+<Ga6Pww>R&vN?8{65zPIj@IJ>;>M
zedJTXehzSuLmcJ^M>)oEPH>V!PH~zd&Ty7<oaX`;xkNFSxx!Vhah(!waFbiy<_>qc
z$9*2~kVib`2~R2I8P9paOJ4DsH@xK?@A<$-K2i3$|L=zWD_Dz)RH8Cfs7f`WsZI@Q
zQj6NeP={FR5=TAi(}0FFqA^WqN;8_%f|kV7iUeAdNE_PHj`nn*Bc13>63KL-E8XZ$
z4^rq!FM5+oANtad{tRFsgBVO2>0~g3p$ua<BN)jjMl*)7jAJ|#n8+k1Gli*4V>&aK
z$t-3whfL-&kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEWRcAVHnNG$Y+)-o<g$(J>|iIm
z*v%gD*vmfhDPTVbILILmbA+QD<2WZcNg<~=O%Z1}%Q?<-fs0(Cn9E$@D%ZG92{*XO
zEpBs%yWHbG4|vEU9`l5!l=6({yx=9Tc+DH$@{ad>;3J<X``rJZg#Ig7i;7gDGF7Nb
zHKM6b4Qf)0+Qd+YSn3i-J?hhdhBTrvO=wCpn$v=o#M6odT9Zf{+R~2pbf6=h=u8sH
zbfGKV=uQt(=t(bnlS&`@(vSWOU?77SOd9EAFodBDV>lxi$tXrMhOvxeJQJA6BqlS3
zsZ3)!GnmONW;2IO<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`d`b%?38IiOp<bD>>w{
zjqU7UC%f3q9`e}BKJqDGKL<F-Ar5ndqa5QnCpbwVr#MX!XE@6_&U1l_T%wrET;VF$
zxK0T-xXCSUbBDX!<30~~$Ri%}gr}79jOV=IC9inR8{YDc_k7?ZpD6p>|DT5bD_Dz)
zRH8Cfs7f`WsZI@QQj6NeP={FR5=TAi(}0FFqA^WqN;8_%f|kV7iUeAdNE_PHj`nn*
zBc13>63KL-E8XZ$4^rq!FM5+oANtad{tRFsgBVO2>0~g3p$ua<BN)jjMl*)7jAJ|#
zn8+k1Gli*4V>&aK$t-3whfL-&kNGTMA&Xed5|*-z<*Z;Ot60q%*0PTEWRcAVHnNG$
zY+)-o<g$(J>|iIm*v%gD*vmfhDPTVbILILmbA+QD<2WZcNg<~=O%Z1}%Q?<-fs0(C
zn9E$@D%ZG92{*XOEpBs%yWHbG4|vEU9`l5!l=6({yx=9Tc+DH$@{ad>;3J<X``rJZ
zh5jp8i;7gDGF7NbHKM6b4Qf)0+Qd+YSn3i-J?hhdhBTrvO=wCpn$v=o#M6odT9Zf{
z+R~2pbf6=h=u8sHbfGKV=uQt(=t(bnlS&`@(vSWOU?77SOd9EAFodBDV>lxi$tXrM
zhOvxeJQJA6BqlS3sZ3)!GnmONW;2IO<}#1@EMOsvSj-ZZvW(@dU?r<q%^KFSj`d`b
z%?38IiOp<bD>>w{jqU7UC%f3q9`e}BKJqDGKL<F-Ar5ndqa5QnCpbwVr#MX!XE@6_
z&U1l_T%wrET;VF$xK0T-xXCSUbBDX!<30~~$Ri%}gr}79jOV=IC9inR8{YDc_k7?Z
zpD6o$2f+8j9RT43V3+_AI|0f?{du?pAWVQT0U~$;{6*LURPa1hkxEpi3RS5_G}Wm=
zO=?k_80rv9UE-)meHze^Ml_}gO=(7RTF{bsT9H6&5@|zQ+R>g4bfgoVNg|mpbfp{J
z=|KuT=|yi+=|f-o(VqbfWDtW%BlI7^{r_d?KlC5o1BlS~0Ln%ERp>wTANr5j{=Xmk
z5B-P!Bewq^g#JVSq5p{O|A(Rf(0}MZV*CG5=s)xy`j6QD|2p&^`Vaj_Z2x}~`Vakw
z{v)>kzYYC|{zLx}+y9S4|DpfTf5i6xccK5#f9OAA`~UmUf9OB-AF=)aB=jHp5B*1I
z|GtTCxC0=Z08V!bFoPisWf;R5!AM3inlX%J9OIe5L?$trDNJP=)0x3cW-*&NWHOg|
z%x3`$S;S(Nu#{yiX9X)+#cI~DmUXNri)=Qqkxgu73tPz{mu+lk2RqrtZuXEzgie5R
zQ9liL0E7t;CO`yFfPV~ofC|0>R3zL15S{??{XZyRKL<F-Ar5ndqa5QnCpbwVr#MX!
zXE@6_&U1l_T%wrET;VF$xK0T-xXCSUbBDX!<30~~$Ri%}gr}79jOV=IC9inR8{YDc
z_k7?ZpD6o$58ywA{zLy^2N1D4fPW7ChyFwV5!?U2g#JVSq5p{O|7W59(0}MZV*CGj
z=s)xy`j6QD|26a<`Vaj_Z2!Lq{fGWT{}J2&zlHuo|Dpeg?f>6H|DpfTf5i6x%g}%5
zKlC55{r^YkKlC5^kJ$eIGxQ(&5B*1M|Gx_ThyFwV5!!$GsQ(Ig0E833=}v=X5bgjd
z9~E&Y04sz$0Kx<a6Ci@`0H_%D0AUXho&X|r4^S?uQs_VQANr5j{ws(6L;s=wi0!{h
z=s)xy`j6QDtA_qV|Dpeg?Y~;+KlC5^kJ$dBL;s=w(0|1CUp@36`Vaj_Z2vVv|DpfT
zf5i4*GxQ(&5B*1M|FuH@q5sf-#P(l1^dI^U{YPy7F~8RSzli!GE&rQeY$^HrkH6TH
z^A&&m+81Aoib^Z_din3gd_JA{=T4^m1%Jt3@qK>45BU*)&EN31{FuMv@A(P;z)$%{
z{)vC)U-%h6=U@2+|Hi-bOa6oZ<X8L`VK#)>5V5nNPC;3@sQ;ycukag`<Ewm)-=sWW
z=ePK6eur=HzxiE$kKgAH_$GhI7krC9;@f<OFZpA>%b)P4{2AXPOn~xHt*t{MZD>n7
z+S7rKbfPm!B-4eibfY^xNTDaa=uIkp=u1EPGk}2%VlZi>lR-Ei9C7D^%SXkA8v?>J
zK$rj#d_K5txFH}+fG`0fcml-zTHhfEdjQYg34}dB`KZO#VF^oF#&TA$l2xo`4QpA)
zda}r70~^`IX11`E9CF#lc6P9nUF>ELdF*8$`4q69103WKhdIJgj&Yn5oTQLboTi8~
zoaG$nxxhs(QOsqoaFuIZr-U2a<QBKN!(Hxip9ehT5s!JoQ%ZTpb6)V0SG?v8Z+XXi
zKJbxGlzpRoxu|-f{|eTkB9*926{=E=XsT0#n$)5;G1MWJy2MeB`ZS;+jc800n$nEs
zw4f#Nv?BChK5CJ52>t&W`mZ1Q5B-Pt03-J6e}m9}=s)xyvHdp;{fGWT{}J1NqtJio
zKlC55{WlK%hyFwV5!-*0(0}MZ^dGVPHx2!V{zLx}+kdmrf9OB-AF=&65B-P!L;n%m
ze~Zw6=s)xyvHiCU{fGWT{}J1NeCR*)ANr5b{vALGcL0PF!0ArkWH5xG3}ZMW7|AF`
zGlsE@V>}a>$Rs8+g{e$qIy0EbEM_x@Oy)9=`7B@|iwGxxehnvp62ct-VFH8+5W#1F
zTZcVB*aL(ofC$|Kl#5CX{fGWT{}J1No6vvgKlC55{kIMMhyFwV5!-*e(0}MZ^dGVP
zw-5b?{zLx}+kc19f9OB-AF=&+4E=}xL;n%mf2YuY=s)xyvHf=r{fGWT{}J1NQs_VQ
zANr5j{*y!hq5sf-#P;7M^dI^U{YPm3zL|VD0~}5OraOO|K{x{(f!_tRTQ~z8CP0_~
z5qt)?d)NbnJwSK@h|oPixu_nY|ImNvKVth&3H^uuL;n%mf6vf==s)xyvHkZ7{fGWT
z{}J1N@6dnfKlC55{ilZhL;s=wi0!{m=s)xy`j6QD`-c8Q|Dpeg?Z02>KlC5^kJ$eE
zhyFwVq5p{Oe?aIz^dI_<*!~BG{zLzv|A_5>Q0PDOANr5b{>w)V4tD^A6Ts==4uGf#
zodD&c(!w18VFH8+5W#l<q=!8~*aL(;K*T-)WQ6`h|Dpeg?SDw<KlC5^kJ$c)hW<nU
zq5p{Oe^}^0^dI_<*#3uy{zLzv|A_5>MCd>CANr5j{zr!XL;s=wi0yw==s)xy`j6QD
zM~D7H|Dpeg?SD+@KlC5^kJ$dlhW<nUq5p{Oe_ZH4^dI_<*#5_d{zLzv{|N2BeAI++
z2S7LhoF47~h}=5>CWboz!UPBtAcF4zm=yK^VGj`Y01>(eC>J$3^dI^U{YPy7Q$qit
z|ImNL_CGcBANmjdM{NJoLjR%v(0|1CKRxsx`Vaj_Z2vPt|DpfTf5i4bGxQ(&5B*1M
z|Fc5>q5sf-#P&Zs^dI^U{YPy7b3*^2|ImNL_MaKi{eSV5@BA|QYbl8x+9oG_Q7-C>
zuY5o1+h2`O?3(lsQD2n%f4~1})VI<~%YEm&-}&(mqQ2`D-;es*cmD5ZeevIa^{wLk
zZ+`ZJ^1u99)Q`&jDl;nT+vTGE=i~EB%Kd*Y`RNy5`TVbat9axOzg1N5%^%#%|K{fx
qmH+&r&mZ3||3AO}_s`$T8vVo1pB?p!AAdJ0%8#gQVk^Y`^#1^ckVs+x

literal 0
HcmV?d00001


From 08c0c5acc58136ee1c07696a02878fee2e379c09 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Fri, 18 Nov 2022 13:40:05 -0800
Subject: [PATCH 193/202] comment

---
 cpp/src/io/orc/stripe_enc.cu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index a9a1c4ad861..9032e3d2502 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -1235,6 +1235,8 @@ __global__ void __launch_bounds__(1024)
                        ? results[ss.first_block + b].bytes_written
                        : src_len;
       uint32_t blk_size24{};
+      // Only use the compressed block if it's smaller than the uncompressed
+      // If compression failed, dst_len == src_len, so the uncompressed block will be used
       if (src_len < dst_len) {
         // Copy from uncompressed source
         src                                       = inputs[ss.first_block + b].data();

From 21ba312366cca512844da229cf4fcc1b97eb3b61 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 18 Nov 2022 16:37:43 -0600
Subject: [PATCH 194/202] Fix dask backend dispatch (#12203)

This PR fixes a failure being observed in `dask` upstream: https://github.com/dask/dask/issues/9676

Authors:
   - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
   - Richard (Rick) Zamora (https://github.com/rjzamora)
---
 python/dask_cudf/dask_cudf/backends.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 49b5e725fed..c8e4e015d4a 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -470,7 +470,12 @@ class CudfBackendEntrypoint(DataFrameBackendEntrypoint):
 
         @staticmethod
         def from_dict(
-            data, npartitions, orient="columns", dtype=None, columns=None
+            data,
+            npartitions,
+            orient="columns",
+            dtype=None,
+            columns=None,
+            constructor=cudf.DataFrame,
         ):
 
             return _default_backend(
@@ -480,7 +485,7 @@ def from_dict(
                 orient=orient,
                 dtype=dtype,
                 columns=columns,
-                constructor=cudf.DataFrame,
+                constructor=constructor,
             )
 
         @staticmethod

From a8afc756d1daf87b8f9453756f4c73c3d86de924 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Fri, 18 Nov 2022 16:04:31 -0800
Subject: [PATCH 195/202] fix is_data_empty

---
 cpp/src/io/orc/reader_impl.cu | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index cc686edeeef..b7d73c7d1a2 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -967,7 +967,6 @@ table_with_metadata reader::impl::read(size_type skip_rows,
     // Association between each ORC column and its cudf::column
     _col_meta.orc_col_map.emplace_back(_metadata.get_num_cols(), -1);
     std::vector<orc_column_meta> nested_col;
-    bool is_data_empty = false;
 
     // Get a list of column data types
     std::vector<data_type> column_types;
@@ -1050,6 +1049,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       size_t num_rowgroups    = 0;
       int stripe_idx          = 0;
 
+      bool is_data_empty = true;
       std::vector<std::pair<std::future<size_t>, size_t>> read_tasks;
       for (auto const& stripe_source_mapping : selected_stripes) {
         // Iterate through the source files selected stripes
@@ -1072,10 +1072,16 @@ table_with_metadata reader::impl::read(size_type skip_rows,
           if (total_data_size == 0) {
             CUDF_EXPECTS(stripe_info->indexLength == 0, "Invalid index rowgroup stream data");
 
-            auto const are_all_empty =
-              std::all_of(_col_meta.num_child_rows.begin(),
-                          _col_meta.num_child_rows.end(),
-                          [](auto col_num_rows) { return col_num_rows == 0; });
+            auto const are_all_empty = std::all_of(
+              thrust::make_counting_iterator(0ul),
+              thrust::make_counting_iterator(num_columns),
+              [&](auto col_idx) {
+                auto const col_stripe_num_rows =
+                  (level == 0)
+                    ? stripe_info->numberOfRows
+                    : _col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx];
+                return col_stripe_num_rows == 0;
+              });
 
             auto const are_all_structs =
               std::all_of(column_types.begin(), column_types.end(), [](auto dtype) {
@@ -1085,7 +1091,8 @@ table_with_metadata reader::impl::read(size_type skip_rows,
             // In case ROW GROUP INDEX is not present and all columns are structs with no null
             // stream, there is nothing to read at this level.
             CUDF_EXPECTS(are_all_empty or are_all_structs, "Expected streams data within stripe");
-            is_data_empty = true;
+          } else {
+            is_data_empty = false;
           }
 
           stripe_data.emplace_back(total_data_size, stream);

From e670c1061e81ce4c1c49440a969a2dbe3f3a1a35 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 21 Nov 2022 14:31:04 -0800
Subject: [PATCH 196/202] remove assert; separate empty stripe and level

---
 cpp/src/io/orc/reader_impl.cu | 42 +++++++++--------------------------
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu
index b7d73c7d1a2..0623e35741d 100644
--- a/cpp/src/io/orc/reader_impl.cu
+++ b/cpp/src/io/orc/reader_impl.cu
@@ -1049,7 +1049,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
       size_t num_rowgroups    = 0;
       int stripe_idx          = 0;
 
-      bool is_data_empty = true;
+      bool is_level_data_empty = true;
       std::vector<std::pair<std::future<size_t>, size_t>> read_tasks;
       for (auto const& stripe_source_mapping : selected_stripes) {
         // Iterate through the source files selected stripes
@@ -1069,37 +1069,16 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                                                           stream_info,
                                                           level == 0);
 
-          if (total_data_size == 0) {
-            CUDF_EXPECTS(stripe_info->indexLength == 0, "Invalid index rowgroup stream data");
-
-            auto const are_all_empty = std::all_of(
-              thrust::make_counting_iterator(0ul),
-              thrust::make_counting_iterator(num_columns),
-              [&](auto col_idx) {
-                auto const col_stripe_num_rows =
-                  (level == 0)
-                    ? stripe_info->numberOfRows
-                    : _col_meta.num_child_rows_per_stripe[stripe_idx * num_columns + col_idx];
-                return col_stripe_num_rows == 0;
-              });
-
-            auto const are_all_structs =
-              std::all_of(column_types.begin(), column_types.end(), [](auto dtype) {
-                return dtype.id() == type_id::STRUCT;
-              });
-
-            // In case ROW GROUP INDEX is not present and all columns are structs with no null
-            // stream, there is nothing to read at this level.
-            CUDF_EXPECTS(are_all_empty or are_all_structs, "Expected streams data within stripe");
-          } else {
-            is_data_empty = false;
-          }
+          auto const is_stripe_data_empty = total_data_size == 0;
+          if (not is_stripe_data_empty) { is_level_data_empty = false; }
+          CUDF_EXPECTS(not is_stripe_data_empty or stripe_info->indexLength == 0,
+                       "Invalid index rowgroup stream data");
 
           stripe_data.emplace_back(total_data_size, stream);
           auto dst_base = static_cast<uint8_t*>(stripe_data.back().data());
 
           // Coalesce consecutive streams into one read
-          while (not is_data_empty and stream_count < stream_info.size()) {
+          while (not is_stripe_data_empty and stream_count < stream_info.size()) {
             const auto d_dst  = dst_base + stream_info[stream_count].dst_pos;
             const auto offset = stream_info[stream_count].offset;
             auto len          = stream_info[stream_count].length;
@@ -1177,7 +1156,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
             if (chunk.type_kind == orc::TIMESTAMP) {
               chunk.timestamp_type_id = _timestamp_type.id();
             }
-            if (not is_data_empty) {
+            if (not is_stripe_data_empty) {
               for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) {
                 chunk.streams[k] = dst_base + stream_info[chunk.strm_id[k]].dst_pos;
               }
@@ -1214,7 +1193,8 @@ table_with_metadata reader::impl::read(size_type skip_rows,
                          });
         }
         // Setup row group descriptors if using indexes
-        if (_metadata.per_file_metadata[0].ps.compression != orc::NONE and not is_data_empty) {
+        if (_metadata.per_file_metadata[0].ps.compression != orc::NONE and
+            not is_level_data_empty) {
           auto decomp_data = decompress_stripe_data(chunks,
                                                     stripe_data,
                                                     *_metadata.per_file_metadata[0].decompressor,
@@ -1257,7 +1237,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
           out_buffers[level].emplace_back(column_types[i], n_rows, is_nullable, stream, _mr);
         }
 
-        if (not is_data_empty) {
+        if (not is_level_data_empty) {
           decode_stream_data(chunks,
                              num_dict_entries,
                              skip_rows,
@@ -1271,7 +1251,7 @@ table_with_metadata reader::impl::read(size_type skip_rows,
 
         // Extract information to process nested child columns
         if (nested_col.size()) {
-          if (not is_data_empty) {
+          if (not is_level_data_empty) {
             scan_null_counts(chunks, null_count_prefix_sums[level], stream);
           }
           row_groups.device_to_host(stream, true);

From cd6dff34c7ca0810d535ba3a0ee6b3fa3e788187 Mon Sep 17 00:00:00 2001
From: David Wendt <dwendt@nvidia.com>
Date: Mon, 21 Nov 2022 18:12:50 -0500
Subject: [PATCH 197/202] Workaround for CUB segmented-sort bug with boolean
 keys

---
 cpp/src/sort/segmented_sort.cu          |  2 +-
 cpp/tests/sort/segmented_sort_tests.cpp | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/cpp/src/sort/segmented_sort.cu b/cpp/src/sort/segmented_sort.cu
index 685d8aa3ec1..dc87d5ea326 100644
--- a/cpp/src/sort/segmented_sort.cu
+++ b/cpp/src/sort/segmented_sort.cu
@@ -52,7 +52,7 @@ struct column_fast_sort_fn {
   static bool is_fast_sort_supported(column_view const& col)
   {
     return !col.has_nulls() and
-           (cudf::is_integral(col.type()) ||
+           ((cudf::is_integral(col.type()) && !cudf::is_boolean(col.type())) ||
             (cudf::is_fixed_point(col.type()) and (col.type().id() != type_id::DECIMAL128)));
   }
 
diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp
index c1a742e63b8..ad905b6d04f 100644
--- a/cpp/tests/sort/segmented_sort_tests.cpp
+++ b/cpp/tests/sort/segmented_sort_tests.cpp
@@ -274,5 +274,24 @@ TEST_F(SegmentedSortInt, ErrorsMismatchArgSizes)
   CUDF_EXPECT_NO_THROW(cudf::segmented_sort_by_key(input1, input1, segments));
 }
 
+TEST_F(SegmentedSortInt, Bool)
+{
+  cudf::test::fixed_width_column_wrapper<bool> col1{
+    {true,  false, false, true, true,  true,  true, true, true,  true, true,  true, true, false,
+     false, false, false, true, false, false, true, true, true,  true, true,  true, true, false,
+     true,  false, true,  true, true,  true,  true, true, false, true, false, false}};
+
+  cudf::test::fixed_width_column_wrapper<int> segments{{0, 5, 10, 15, 20, 25, 30, 40}};
+
+  auto test_col = cudf::column_view{col1};
+  auto result   = cudf::segmented_sorted_order(cudf::table_view({test_col}), segments);
+
+  cudf::test::fixed_width_column_wrapper<int> expected(
+    {1,  2,  0,  3,  4,  5,  6,  7,  8,  9,  13, 14, 10, 11, 12, 15, 16, 18, 19, 17,
+     20, 21, 22, 23, 24, 27, 29, 25, 26, 28, 36, 38, 39, 30, 31, 32, 33, 34, 35, 37});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(result->view(), expected);
+}
+
 }  // namespace test
 }  // namespace cudf

From f15080f32040fd96219408f8f38359c1c47c3dd2 Mon Sep 17 00:00:00 2001
From: vuule <vmilovanovic@nvidia.com>
Date: Mon, 21 Nov 2022 20:50:06 -0800
Subject: [PATCH 198/202] test

---
 .../data/orc/TestOrcFile.Hive.AllNulls.orc    | Bin 0 -> 293 bytes
 .../orc/TestOrcFile.Hive.EmptyListStripe.orc  | Bin 0 -> 311 bytes
 .../orc/TestOrcFile.Hive.NullStructStripe.orc | Bin 0 -> 292 bytes
 .../orc/TestOrcFile.Hive.OneEmptyList.orc     | Bin 0 -> 174 bytes
 .../data/orc/TestOrcFile.Hive.OneEmptyMap.orc | Bin 0 -> 156 bytes
 .../orc/TestOrcFile.Hive.OneNullStruct.orc    | Bin 0 -> 158 bytes
 python/cudf/cudf/tests/test_orc.py            |  19 ++++++++++++++++++
 7 files changed, 19 insertions(+)
 create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc
 create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc
 create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc
 create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc
 create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc
 create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc

diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.AllNulls.orc
new file mode 100644
index 0000000000000000000000000000000000000000..1c661e1c6f067a61b5ad4d185afa7da8b85b35ff
GIT binary patch
literal 293
zcmZvWu?oUK42F}OZR52<I4m4>PzpjvSDl0oZWj6qcJ&>Ko15?A6ZlR(fbrT6g2#RL
z@h5Oe)7EnUOBU9ug-u{Mju=*2No^Dg|FU*0ApG2y)?$tYd=iY_1(TOx6AXer(oAZj
zMxkIDjbg^E{*G2?Vyh#>5_gJjK5z$MBTp=>YVVhuOVyRknL{k_TE?6@B2&&daWK5Q
c?x24tFmZgG`J@~4y_5u5mEYdRG;N)K0Fo^evH$=8

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.EmptyListStripe.orc
new file mode 100644
index 0000000000000000000000000000000000000000..edc1094a18698bf0ae2f38a087740bf9476d1baf
GIT binary patch
literal 311
zcmYk$!AiqG5C-6XHg2743$9C%r6_2;gdUbig+fpidhujKPrXShK}EDNRYC9&d;*U?
zs5cL~$)R|d85m~fo9S$_4XBRUV9GEAcC_lNO@tJikde_Mur(k0ku4PfFw8K*B7+2e
z@eG{ost$KFY*#XqwUWPt?fV`yS?R0r)eB78?-|kTvuNVXU)@tJth$Xgm#wDO#t*=O
zejg~G$5!s2#uRDp8KOb>^iouBR}Xm^NmhnLBsdrzD7&m~Twdnaw}o4kd3Enzi)wYZ
wd^B?8MlMMxZN>Dzn6Ab1&&vzUX(5l?*d24-JYCm166nQI5`KIK!E`pUe-AM!`Tzg`

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.NullStructStripe.orc
new file mode 100644
index 0000000000000000000000000000000000000000..fe5f57af14c9bc1d8e327b4697ea2aed4a3e244c
GIT binary patch
literal 292
zcmX|5O-sW-5S`i0hUqrocIjaW7BpUphb5&gLQX<2o^0r;H;FZ<XiZE}>aF?<{IOn(
z?gr_=d_CU0H}l1;2OvWo5auv&V<ZD`;Q+|-%mGJ|`<)0hGmwJ-_=P)w$*~M6RA9P-
z2u)AWmYKf8K%o(?qjG%B{9T)D>eP&EojfharH}{NJpggFj-{)q5hv3jCW<1B7<XRZ
z%I4!{UDQd}BP<S9)$=G}#LrKqshWFR)Wzzdw41tUp6q+sY#yr@Eyi|i(-gDTGW)m8
k4wjeS*H^H_TQRa1c0!;2;oVLp45Hy6?R@=kK3~kt9}pKOP5=M^

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyList.orc
new file mode 100644
index 0000000000000000000000000000000000000000..53c323436d60cff3caba808616fa0741d897943f
GIT binary patch
literal 174
zcmeYda%N><U}ppoCoCBlxY#(D1Q;b)fi$B4lLV^}69)qjGl4Kr&3^_@uo?!a8jxC$
z9*|m)I=I?$1_lmhfe0xsAju%Xr68}t1!U_0*{llc8jMN;96Umdj8dG1rKv@g@wtfw
pN~|1QLX1pGOdQMx+#Gg5^BNiil$aQJG#Csxo4znJ`v*CT0|2sC5t#r0

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneEmptyMap.orc
new file mode 100644
index 0000000000000000000000000000000000000000..1bb4079c492501d56ee033f301a196a9b6ac1a67
GIT binary patch
literal 156
zcmXAdF$=;l6ol`+B<*W$AY?Igl1v@5`2&ih6iRn-aBvV*aBwT)Pj=T7hr92-8^+!M
zU%Y{7stbXmU@|!qndI0A*b0=DY|4fzZxZn=q!W8B%HiK<Aj`h3mn)dfuA}Ir@kDKP
ldz|ji!{vC5g{(ZYiY(=lM6z`p=2=HZ(;=0wk5a?fuYXBV5QzW)

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.Hive.OneNullStruct.orc
new file mode 100644
index 0000000000000000000000000000000000000000..a457b8285bd6626b96dcaefdd0f77559da356138
GIT binary patch
literal 158
zcmeYda%N><U_YVBz`(`E!6?8a!79YW!2rcfK)L@6mS8ys0Y(W{s2Z3&h-PYFU~u%|
zU>0ze;sTNk608d18eBlOJS$IxyauC^00)l{Bcl{&VQFenWqfX8ff6^PkO7kvOLBgW
ip%N1ZvjI1UGtlCO1_31|1|AIt1J0%|%*_5l&f);IoD&WJ

literal 0
HcmV?d00001

diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 1699c11617a..48f1a49b7f4 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1815,3 +1815,22 @@ def test_statistics_string_sum():
 
     file_stats, stripe_stats = cudf.io.orc.read_orc_statistics([buff])
     assert_eq(file_stats[0]["str"].get("sum"), sum(len(s) for s in strings))
+
+
+@pytest.mark.parametrize(
+    "fname",
+    [
+        "TestOrcFile.Hive.OneEmptyMap.orc",
+        "TestOrcFile.Hive.OneEmptyList.orc",
+        "TestOrcFile.Hive.OneNullStruct.orc",
+        "TestOrcFile.Hive.EmptyListStripe.orc",
+        "TestOrcFile.Hive.NullStructStripe.orc",
+        "TestOrcFile.Hive.AllNulls.orc",
+    ],
+)
+def test_reader_empty_stripe(datadir, fname):
+    path = datadir / fname
+
+    expected = pd.read_orc(path)
+    got = cudf.read_orc(path)
+    assert_eq(expected, got)

From 0c60819cf3268d9568a297010f77ae95900923bb Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 23 Nov 2022 05:41:40 -0800
Subject: [PATCH 199/202] Make dask pinning looser (#12231)

* Make pinning >=.

* Temporarily reenable wheel builds to ensure that things work as expected.

* Skip cudf tests and make sure dask-cudf builds.

* Undo changes to wheels scripts.
---
 python/dask_cudf/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index d9d4da9c4ab..83b21a0c6f7 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -8,8 +8,8 @@
 cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
 
 install_requires = [
-    "dask==2022.9.2",
-    "distributed==2022.9.2",
+    "dask>=2022.9.2",
+    "distributed>=2022.9.2",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",

From c83ff55a24ab146369b923cdb96e97a8813fc49f Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 28 Nov 2022 15:44:49 -0800
Subject: [PATCH 200/202] Fix include line for io/numpy.

---
 python/cudf/cudf/_lib/io/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt
index af5ffccd237..5bc7bb77525 100644
--- a/python/cudf/cudf/_lib/io/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/io/CMakeLists.txt
@@ -22,5 +22,5 @@ rapids_cython_create_modules(
 
 set(targets_using_numpy io_datasource io_utils)
 foreach(target IN LISTS targets_using_numpy)
-  target_include_directories(${target} PRIVATE "${Python_NumPy_INCLUDE_DIRS}")
+  target_include_directories(${target} PRIVATE "${NumPy_INCLUDE_DIRS}")
 endforeach()

From 297911f425ff08f005fada0291bcf5a9b1dc8729 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 1 Dec 2022 04:25:50 -0800
Subject: [PATCH 201/202] Pin to 2022.11.1

---
 ci/benchmark/build.sh                            | 2 +-
 ci/gpu/build.sh                                  | 2 +-
 conda/environments/all_cuda-115_arch-x86_64.yaml | 4 ++--
 conda/recipes/custreamz/meta.yaml                | 4 ++--
 conda/recipes/dask-cudf/meta.yaml                | 8 ++++----
 dependencies.yaml                                | 4 ++--
 python/dask_cudf/setup.py                        | 4 ++--
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 011f947beb0..e186946a3d0 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -40,7 +40,7 @@ export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 export INSTALL_DASK_MAIN=0
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.11.0"
+export DASK_STABLE_VERSION="2022.11.1"
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 1d4911d71ab..13d62c7464a 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -35,7 +35,7 @@ unset GIT_DESCRIBE_TAG
 export INSTALL_DASK_MAIN=0
 
 # Dask version to install when `INSTALL_DASK_MAIN=0`
-export DASK_STABLE_VERSION="2022.11.0"
+export DASK_STABLE_VERSION="2022.11.1"
 
 # ucx-py version
 export UCX_PY_VERSION='0.29.*'
diff --git a/conda/environments/all_cuda-115_arch-x86_64.yaml b/conda/environments/all_cuda-115_arch-x86_64.yaml
index 1ee81f31854..cd900efced5 100644
--- a/conda/environments/all_cuda-115_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-115_arch-x86_64.yaml
@@ -21,8 +21,8 @@ dependencies:
 - cxx-compiler
 - cython>=0.29,<0.30
 - dask-cuda=22.12.*
-- dask==2022.11.0
-- distributed==2022.11.0
+- dask==2022.11.1
+- distributed==2022.11.1
 - dlpack>=0.5,<0.6.0a0
 - doxygen=1.8.20
 - fastavro>=0.22.9
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index 19d9728b234..b5a27cdac92 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -29,8 +29,8 @@ requirements:
     - python
     - streamz
     - cudf ={{ version }}
-    - dask==2022.11.0
-    - distributed==2022.11.0
+    - dask==2022.11.1
+    - distributed==2022.11.1
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka ={{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 8f83053eab5..d97a8448a53 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -24,14 +24,14 @@ requirements:
   host:
     - python
     - cudf ={{ version }}
-    - dask==2022.11.0
-    - distributed==2022.11.0
+    - dask==2022.11.1
+    - distributed==2022.11.1
     - cudatoolkit ={{ cuda_version }}
   run:
     - python
     - cudf ={{ version }}
-    - dask==2022.11.0
-    - distributed==2022.11.0
+    - dask==2022.11.1
+    - distributed==2022.11.1
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:                                   # [linux64]
diff --git a/dependencies.yaml b/dependencies.yaml
index aeb680ebdb2..074ef3696c4 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -141,8 +141,8 @@ dependencies:
       - output_types: [conda, requirements]
         packages:
           - cachetools
-          - dask==2022.11.0
-          - distributed==2022.11.0
+          - dask==2022.11.1
+          - distributed==2022.11.1
           - fsspec>=0.6.0
           - numba>=0.56.2
           - numpy
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 83b21a0c6f7..651245c4a50 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -8,8 +8,8 @@
 cuda_suffix = os.getenv("RAPIDS_PY_WHEEL_CUDA_SUFFIX", default="")
 
 install_requires = [
-    "dask>=2022.9.2",
-    "distributed>=2022.9.2",
+    "dask==2022.11.1",
+    "distributed==2022.11.1",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.6.0dev0",

From f471bcc0d284aeeab42efab0c501d1124cfa7efc Mon Sep 17 00:00:00 2001
From: Raymond Douglass <ray@raydouglass.com>
Date: Thu, 8 Dec 2022 09:46:47 -0500
Subject: [PATCH 202/202] update changelog

---
 CHANGELOG.md | 230 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 228 insertions(+), 2 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8cf477e9fcf..b872e954d87 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,232 @@
-# cuDF 22.12.00 (Date TBD)
+# cuDF 22.12.00 (8 Dec 2022)
 
-Please see https://github.com/rapidsai/cudf/releases/tag/v22.12.00a for the latest changes to this development branch.
+## 🚨 Breaking Changes
+
+- Add JNI for `substring` without &#39;end&#39; parameter. ([#12113](https://github.com/rapidsai/cudf/pull/12113)) [@firestarman](https://github.com/firestarman)
+- Refactor `purge_nonempty_nulls` ([#12111](https://github.com/rapidsai/cudf/pull/12111)) [@ttnghia](https://github.com/ttnghia)
+- Create an `int8` column in `read_csv` when all elements are missing ([#12110](https://github.com/rapidsai/cudf/pull/12110)) [@vuule](https://github.com/vuule)
+- Throw an error when libcudf is built without cuFile and `LIBCUDF_CUFILE_POLICY` is set to `&quot;ALWAYS&quot;` ([#12080](https://github.com/rapidsai/cudf/pull/12080)) [@vuule](https://github.com/vuule)
+- Fix type promotion edge cases in numerical binops ([#12074](https://github.com/rapidsai/cudf/pull/12074)) [@wence-](https://github.com/wence-)
+- Reduce/Remove reliance on `**kwargs` and `*args` in `IO` readers &amp; writers ([#12025](https://github.com/rapidsai/cudf/pull/12025)) [@galipremsagar](https://github.com/galipremsagar)
+- Rollback of `DeviceBufferLike` ([#12009](https://github.com/rapidsai/cudf/pull/12009)) [@madsbk](https://github.com/madsbk)
+- Remove unused `managed_allocator` ([#12005](https://github.com/rapidsai/cudf/pull/12005)) [@vyasr](https://github.com/vyasr)
+- Pass column names to `write_csv` instead of `table_metadata` pointer ([#11972](https://github.com/rapidsai/cudf/pull/11972)) [@vuule](https://github.com/vuule)
+- Accept const refs instead of const unique_ptr refs in reduce and scan APIs. ([#11960](https://github.com/rapidsai/cudf/pull/11960)) [@vyasr](https://github.com/vyasr)
+- Default to equal NaNs in make_merge_sets_aggregation. ([#11952](https://github.com/rapidsai/cudf/pull/11952)) [@bdice](https://github.com/bdice)
+- Remove validation that requires introspection ([#11938](https://github.com/rapidsai/cudf/pull/11938)) [@vyasr](https://github.com/vyasr)
+- Trim quotes for non-string values in nested json parsing ([#11898](https://github.com/rapidsai/cudf/pull/11898)) [@karthikeyann](https://github.com/karthikeyann)
+- Add tests ensuring that cudf&#39;s default stream is always used ([#11875](https://github.com/rapidsai/cudf/pull/11875)) [@vyasr](https://github.com/vyasr)
+- Support nested types as groupby keys in libcudf ([#11792](https://github.com/rapidsai/cudf/pull/11792)) [@PointKernel](https://github.com/PointKernel)
+- Default to equal NaNs in make_collect_set_aggregation. ([#11621](https://github.com/rapidsai/cudf/pull/11621)) [@bdice](https://github.com/bdice)
+- Removing int8 column option from parquet byte_array writing ([#11539](https://github.com/rapidsai/cudf/pull/11539)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- part1: Simplify BaseIndex to an abstract class ([#10389](https://github.com/rapidsai/cudf/pull/10389)) [@skirui-source](https://github.com/skirui-source)
+
+## 🐛 Bug Fixes
+
+- Fix include line for IO Cython modules ([#12250](https://github.com/rapidsai/cudf/pull/12250)) [@vyasr](https://github.com/vyasr)
+- Make dask pinning looser ([#12231](https://github.com/rapidsai/cudf/pull/12231)) [@vyasr](https://github.com/vyasr)
+- Workaround for CUB segmented-sort bug with boolean keys ([#12217](https://github.com/rapidsai/cudf/pull/12217)) [@davidwendt](https://github.com/davidwendt)
+- Fix `from_dict` backend dispatch to match upstream `dask` ([#12203](https://github.com/rapidsai/cudf/pull/12203)) [@galipremsagar](https://github.com/galipremsagar)
+- Merge branch-22.10 into branch-22.12 ([#12198](https://github.com/rapidsai/cudf/pull/12198)) [@davidwendt](https://github.com/davidwendt)
+- Fix compression in ORC writer ([#12194](https://github.com/rapidsai/cudf/pull/12194)) [@vuule](https://github.com/vuule)
+- Don&#39;t use CMake 3.25.0 as it has a show stopping FindCUDAToolkit bug ([#12188](https://github.com/rapidsai/cudf/pull/12188)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix data corruption when reading ORC files with empty stripes ([#12160](https://github.com/rapidsai/cudf/pull/12160)) [@vuule](https://github.com/vuule)
+- Fix decimal binary operations ([#12142](https://github.com/rapidsai/cudf/pull/12142)) [@galipremsagar](https://github.com/galipremsagar)
+- Ensure dlpack include is provided to cudf interop lib ([#12139](https://github.com/rapidsai/cudf/pull/12139)) [@robertmaynard](https://github.com/robertmaynard)
+- Safely allocate `udf_string` pointers in `strings_udf` ([#12138](https://github.com/rapidsai/cudf/pull/12138)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Fix/disable jitify lto ([#12122](https://github.com/rapidsai/cudf/pull/12122)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix conditional_full_join benchmark ([#12121](https://github.com/rapidsai/cudf/pull/12121)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Fix regex working-memory-size refactor error ([#12119](https://github.com/rapidsai/cudf/pull/12119)) [@davidwendt](https://github.com/davidwendt)
+- Add in negative size checks for columns ([#12118](https://github.com/rapidsai/cudf/pull/12118)) [@revans2](https://github.com/revans2)
+- Add JNI for `substring` without &#39;end&#39; parameter. ([#12113](https://github.com/rapidsai/cudf/pull/12113)) [@firestarman](https://github.com/firestarman)
+- Fix reading of CSV files with blank second row ([#12098](https://github.com/rapidsai/cudf/pull/12098)) [@vuule](https://github.com/vuule)
+- Fix an error in IO with `GzipFile` type ([#12085](https://github.com/rapidsai/cudf/pull/12085)) [@galipremsagar](https://github.com/galipremsagar)
+- Workaround groupby aggregate thrust::copy_if overflow ([#12079](https://github.com/rapidsai/cudf/pull/12079)) [@davidwendt](https://github.com/davidwendt)
+- Fix alignment of compressed blocks in ORC writer ([#12077](https://github.com/rapidsai/cudf/pull/12077)) [@vuule](https://github.com/vuule)
+- Fix singleton-range `__setitem__` edge case ([#12075](https://github.com/rapidsai/cudf/pull/12075)) [@wence-](https://github.com/wence-)
+- Fix type promotion edge cases in numerical binops ([#12074](https://github.com/rapidsai/cudf/pull/12074)) [@wence-](https://github.com/wence-)
+- Force using old fmt in nvbench. ([#12067](https://github.com/rapidsai/cudf/pull/12067)) [@vyasr](https://github.com/vyasr)
+- Fixes List offset bug in Nested JSON reader ([#12060](https://github.com/rapidsai/cudf/pull/12060)) [@karthikeyann](https://github.com/karthikeyann)
+- Allow falling back to `shim_60.ptx` by default in `strings_udf` ([#12056](https://github.com/rapidsai/cudf/pull/12056)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Force black exclusions for pre-commit. ([#12036](https://github.com/rapidsai/cudf/pull/12036)) [@bdice](https://github.com/bdice)
+- Add `memory_usage` &amp; `items` implementation for `Struct` column &amp; dtype ([#12033](https://github.com/rapidsai/cudf/pull/12033)) [@galipremsagar](https://github.com/galipremsagar)
+- Reduce/Remove reliance on `**kwargs` and `*args` in `IO` readers &amp; writers ([#12025](https://github.com/rapidsai/cudf/pull/12025)) [@galipremsagar](https://github.com/galipremsagar)
+- Fixes bug in csv_reader_options construction in cython ([#12021](https://github.com/rapidsai/cudf/pull/12021)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix issues when both `usecols` and `names` options are used in `read_csv` ([#12018](https://github.com/rapidsai/cudf/pull/12018)) [@vuule](https://github.com/vuule)
+- Port thrust&#39;s pinned_allocator to cudf, since Thrust 1.17 removes the type ([#12004](https://github.com/rapidsai/cudf/pull/12004)) [@robertmaynard](https://github.com/robertmaynard)
+- Revert &quot;Replace most of preprocessor usage in nvcomp adapter with `constexpr`&quot; ([#11999](https://github.com/rapidsai/cudf/pull/11999)) [@vuule](https://github.com/vuule)
+- Fix bug where `df.loc` resulting in single row could give wrong index ([#11998](https://github.com/rapidsai/cudf/pull/11998)) [@eriknw](https://github.com/eriknw)
+- Switch to DISABLE_DEPRECATION_WARNINGS to match other RAPIDS projects ([#11989](https://github.com/rapidsai/cudf/pull/11989)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix maximum page size estimate in Parquet writer ([#11962](https://github.com/rapidsai/cudf/pull/11962)) [@vuule](https://github.com/vuule)
+- Fix local offset handling in bgzip reader ([#11918](https://github.com/rapidsai/cudf/pull/11918)) [@upsj](https://github.com/upsj)
+- Fix an issue reading struct-of-list types in Parquet. ([#11910](https://github.com/rapidsai/cudf/pull/11910)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Fix memcheck error in TypeInference.Timestamp gtest ([#11905](https://github.com/rapidsai/cudf/pull/11905)) [@davidwendt](https://github.com/davidwendt)
+- Fix type casting in Series.__setitem__ ([#11904](https://github.com/rapidsai/cudf/pull/11904)) [@wence-](https://github.com/wence-)
+- Fix memcheck error in get_dremel_data ([#11903](https://github.com/rapidsai/cudf/pull/11903)) [@davidwendt](https://github.com/davidwendt)
+- Fixes Unsupported column type error due to empty list columns in Nested JSON reader ([#11897](https://github.com/rapidsai/cudf/pull/11897)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix segmented-sort to ignore indices outside the offsets ([#11888](https://github.com/rapidsai/cudf/pull/11888)) [@davidwendt](https://github.com/davidwendt)
+- Fix cudf::stable_sorted_order for NaN and -NaN in FLOAT64 columns ([#11874](https://github.com/rapidsai/cudf/pull/11874)) [@davidwendt](https://github.com/davidwendt)
+- Fix writing of Parquet files with many fragments ([#11869](https://github.com/rapidsai/cudf/pull/11869)) [@etseidl](https://github.com/etseidl)
+- Fix RangeIndex unary operators. ([#11868](https://github.com/rapidsai/cudf/pull/11868)) [@vyasr](https://github.com/vyasr)
+- JNI Avoid NPE for reading host binary data ([#11865](https://github.com/rapidsai/cudf/pull/11865)) [@revans2](https://github.com/revans2)
+- Fix decimal benchmark input data generation ([#11863](https://github.com/rapidsai/cudf/pull/11863)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix pre-commit copyright check ([#11860](https://github.com/rapidsai/cudf/pull/11860)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix Parquet support for seconds and milliseconds duration types ([#11854](https://github.com/rapidsai/cudf/pull/11854)) [@vuule](https://github.com/vuule)
+- Ensure better compiler cache results between cudf cal-ver branches ([#11835](https://github.com/rapidsai/cudf/pull/11835)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix make_column_from_scalar for all-null strings column ([#11807](https://github.com/rapidsai/cudf/pull/11807)) [@davidwendt](https://github.com/davidwendt)
+- Tell jitify_preprocess where to search for libnvrtc ([#11787](https://github.com/rapidsai/cudf/pull/11787)) [@robertmaynard](https://github.com/robertmaynard)
+- add V2 page header support to parquet reader ([#11778](https://github.com/rapidsai/cudf/pull/11778)) [@etseidl](https://github.com/etseidl)
+- Parquet reader: bug fix for a num_rows/skip_rows corner case, w/optimization for nested preprocessing ([#11752](https://github.com/rapidsai/cudf/pull/11752)) [@nvdbaranec](https://github.com/nvdbaranec)
+- Determine if Arrow has S3 support at runtime in unit test. ([#11560](https://github.com/rapidsai/cudf/pull/11560)) [@bdice](https://github.com/bdice)
+
+## 📖 Documentation
+
+- Use rapidsai CODE_OF_CONDUCT.md ([#12166](https://github.com/rapidsai/cudf/pull/12166)) [@bdice](https://github.com/bdice)
+- Add symlinks to notebooks. ([#12128](https://github.com/rapidsai/cudf/pull/12128)) [@bdice](https://github.com/bdice)
+- Add `truncate` API to python doc pages ([#12109](https://github.com/rapidsai/cudf/pull/12109)) [@galipremsagar](https://github.com/galipremsagar)
+- Update Numba docs links. ([#12107](https://github.com/rapidsai/cudf/pull/12107)) [@bdice](https://github.com/bdice)
+- Remove &quot;Multi-GPU with Dask-cuDF&quot; notebook. ([#12095](https://github.com/rapidsai/cudf/pull/12095)) [@bdice](https://github.com/bdice)
+- Fix link to c++ developer guide from `CONTRIBUTING.md` ([#12084](https://github.com/rapidsai/cudf/pull/12084)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add pivot_table and crosstab to docs. ([#12014](https://github.com/rapidsai/cudf/pull/12014)) [@bdice](https://github.com/bdice)
+- Fix doxygen text for cudf::dictionary::encode ([#11991](https://github.com/rapidsai/cudf/pull/11991)) [@davidwendt](https://github.com/davidwendt)
+- Replace default_stream_value with get_default_stream in docs. ([#11985](https://github.com/rapidsai/cudf/pull/11985)) [@vyasr](https://github.com/vyasr)
+- Add dtype docs pages and docstrings for `cudf` specific dtypes ([#11974](https://github.com/rapidsai/cudf/pull/11974)) [@galipremsagar](https://github.com/galipremsagar)
+- Update Unit Testing in libcudf guidelines to code tests outside the cudf::test namespace ([#11959](https://github.com/rapidsai/cudf/pull/11959)) [@davidwendt](https://github.com/davidwendt)
+- Rename libcudf++ to libcudf. ([#11953](https://github.com/rapidsai/cudf/pull/11953)) [@bdice](https://github.com/bdice)
+- Fix documentation referring to removed as_gpu_matrix method. ([#11937](https://github.com/rapidsai/cudf/pull/11937)) [@bdice](https://github.com/bdice)
+- Remove &quot;experimental&quot; warning for struct columns in ORC reader and writer ([#11880](https://github.com/rapidsai/cudf/pull/11880)) [@vuule](https://github.com/vuule)
+- Initial draft of policies and guidelines for libcudf usage. ([#11853](https://github.com/rapidsai/cudf/pull/11853)) [@vyasr](https://github.com/vyasr)
+- Add clear indication of non-GPU accelerated parameters in read_json docstring ([#11825](https://github.com/rapidsai/cudf/pull/11825)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Add developer docs for writing tests ([#11199](https://github.com/rapidsai/cudf/pull/11199)) [@vyasr](https://github.com/vyasr)
+
+## 🚀 New Features
+
+- Adds an EventHandler to Java MemoryBuffer to be invoked on close ([#12125](https://github.com/rapidsai/cudf/pull/12125)) [@abellina](https://github.com/abellina)
+- Support `+` in `strings_udf` ([#12117](https://github.com/rapidsai/cudf/pull/12117)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Support `upper` and `lower` in `strings_udf` ([#12099](https://github.com/rapidsai/cudf/pull/12099)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add wheel builds ([#12096](https://github.com/rapidsai/cudf/pull/12096)) [@vyasr](https://github.com/vyasr)
+- Allow setting malloc heap size in string udfs ([#12094](https://github.com/rapidsai/cudf/pull/12094)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Support `strip`, `lstrip`, and `rstrip` in `strings_udf` ([#12091](https://github.com/rapidsai/cudf/pull/12091)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Mark nvcomp zstd compression stable ([#12059](https://github.com/rapidsai/cudf/pull/12059)) [@jbrennan333](https://github.com/jbrennan333)
+- Add debug-only onAllocated/onDeallocated to RmmEventHandler ([#12054](https://github.com/rapidsai/cudf/pull/12054)) [@abellina](https://github.com/abellina)
+- Enable building against the libarrow contained in pyarrow ([#12034](https://github.com/rapidsai/cudf/pull/12034)) [@vyasr](https://github.com/vyasr)
+- Add strings `like` jni and native method ([#12032](https://github.com/rapidsai/cudf/pull/12032)) [@cindyyuanjiang](https://github.com/cindyyuanjiang)
+- Cleanup common parsing code in JSON, CSV reader ([#12022](https://github.com/rapidsai/cudf/pull/12022)) [@karthikeyann](https://github.com/karthikeyann)
+- byte_range support for JSON Lines format ([#12017](https://github.com/rapidsai/cudf/pull/12017)) [@karthikeyann](https://github.com/karthikeyann)
+- Minor cleanup of root CMakeLists.txt for better organization ([#11988](https://github.com/rapidsai/cudf/pull/11988)) [@robertmaynard](https://github.com/robertmaynard)
+- Add inplace arithmetic operators to `MaskedType` ([#11987](https://github.com/rapidsai/cudf/pull/11987)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Implement JNI for chunked Parquet reader ([#11961](https://github.com/rapidsai/cudf/pull/11961)) [@ttnghia](https://github.com/ttnghia)
+- Add method argument to DataFrame.quantile ([#11957](https://github.com/rapidsai/cudf/pull/11957)) [@rjzamora](https://github.com/rjzamora)
+- Add gpu memory watermark apis to JNI ([#11950](https://github.com/rapidsai/cudf/pull/11950)) [@abellina](https://github.com/abellina)
+- Adds retryCount to RmmEventHandler.onAllocFailure ([#11940](https://github.com/rapidsai/cudf/pull/11940)) [@abellina](https://github.com/abellina)
+- Enable returning string data from UDFs used through `apply` ([#11933](https://github.com/rapidsai/cudf/pull/11933)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Switch over to rapids-cmake patches for thrust ([#11921](https://github.com/rapidsai/cudf/pull/11921)) [@robertmaynard](https://github.com/robertmaynard)
+- Add strings udf C++ classes and functions for phase II ([#11912](https://github.com/rapidsai/cudf/pull/11912)) [@davidwendt](https://github.com/davidwendt)
+- Trim quotes for non-string values in nested json parsing ([#11898](https://github.com/rapidsai/cudf/pull/11898)) [@karthikeyann](https://github.com/karthikeyann)
+- Enable CEC for `strings_udf` ([#11884](https://github.com/rapidsai/cudf/pull/11884)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- ArrowIPCTableWriter writes en empty batch in the case of an empty table. ([#11883](https://github.com/rapidsai/cudf/pull/11883)) [@firestarman](https://github.com/firestarman)
+- Implement chunked Parquet reader ([#11867](https://github.com/rapidsai/cudf/pull/11867)) [@ttnghia](https://github.com/ttnghia)
+- Add `read_orc_metadata` to libcudf ([#11815](https://github.com/rapidsai/cudf/pull/11815)) [@vuule](https://github.com/vuule)
+- Support nested types as groupby keys in libcudf ([#11792](https://github.com/rapidsai/cudf/pull/11792)) [@PointKernel](https://github.com/PointKernel)
+- Adding feature Truncate to DataFrame and Series ([#11435](https://github.com/rapidsai/cudf/pull/11435)) [@VamsiTallam95](https://github.com/VamsiTallam95)
+
+## 🛠️ Improvements
+
+- Reduce number of tests marked `spilling` ([#12197](https://github.com/rapidsai/cudf/pull/12197)) [@madsbk](https://github.com/madsbk)
+- Pin `dask` and `distributed` for release ([#12165](https://github.com/rapidsai/cudf/pull/12165)) [@galipremsagar](https://github.com/galipremsagar)
+- Don&#39;t rely on GNU find in headers_test.sh ([#12164](https://github.com/rapidsai/cudf/pull/12164)) [@wence-](https://github.com/wence-)
+- Update cp.clip call ([#12148](https://github.com/rapidsai/cudf/pull/12148)) [@quasiben](https://github.com/quasiben)
+- Enable automatic column projection in groupby().agg ([#12124](https://github.com/rapidsai/cudf/pull/12124)) [@rjzamora](https://github.com/rjzamora)
+- Refactor `purge_nonempty_nulls` ([#12111](https://github.com/rapidsai/cudf/pull/12111)) [@ttnghia](https://github.com/ttnghia)
+- Create an `int8` column in `read_csv` when all elements are missing ([#12110](https://github.com/rapidsai/cudf/pull/12110)) [@vuule](https://github.com/vuule)
+- Spilling to host memory ([#12106](https://github.com/rapidsai/cudf/pull/12106)) [@madsbk](https://github.com/madsbk)
+- First pass of `pd.read_orc` changes in tests ([#12103](https://github.com/rapidsai/cudf/pull/12103)) [@galipremsagar](https://github.com/galipremsagar)
+- Expose engine argument in dask_cudf.read_json ([#12101](https://github.com/rapidsai/cudf/pull/12101)) [@rjzamora](https://github.com/rjzamora)
+- Remove CUDA 10 compatibility code. ([#12088](https://github.com/rapidsai/cudf/pull/12088)) [@bdice](https://github.com/bdice)
+- Move and update `dask` nigthly install in CI ([#12082](https://github.com/rapidsai/cudf/pull/12082)) [@galipremsagar](https://github.com/galipremsagar)
+- Throw an error when libcudf is built without cuFile and `LIBCUDF_CUFILE_POLICY` is set to `&quot;ALWAYS&quot;` ([#12080](https://github.com/rapidsai/cudf/pull/12080)) [@vuule](https://github.com/vuule)
+- Remove macros that inspect the contents of exceptions ([#12076](https://github.com/rapidsai/cudf/pull/12076)) [@vyasr](https://github.com/vyasr)
+- Fix ingest_raw_data performance issue in Nested JSON reader due to RVO ([#12070](https://github.com/rapidsai/cudf/pull/12070)) [@karthikeyann](https://github.com/karthikeyann)
+- Remove overflow error during decimal binops ([#12063](https://github.com/rapidsai/cudf/pull/12063)) [@galipremsagar](https://github.com/galipremsagar)
+- Change cudf::detail::tdigest to cudf::tdigest::detail ([#12050](https://github.com/rapidsai/cudf/pull/12050)) [@davidwendt](https://github.com/davidwendt)
+- Fix quantile gtests coded in namespace cudf::test ([#12049](https://github.com/rapidsai/cudf/pull/12049)) [@davidwendt](https://github.com/davidwendt)
+- Add support for `DataFrame.from_dict`\`to_dict` and `Series.to_dict` ([#12048](https://github.com/rapidsai/cudf/pull/12048)) [@galipremsagar](https://github.com/galipremsagar)
+- Refactor Parquet reader ([#12046](https://github.com/rapidsai/cudf/pull/12046)) [@ttnghia](https://github.com/ttnghia)
+- Forward merge 22.10 into 22.12 ([#12045](https://github.com/rapidsai/cudf/pull/12045)) [@vyasr](https://github.com/vyasr)
+- Standardize newlines at ends of files. ([#12042](https://github.com/rapidsai/cudf/pull/12042)) [@bdice](https://github.com/bdice)
+- Trim trailing whitespace from all files. ([#12041](https://github.com/rapidsai/cudf/pull/12041)) [@bdice](https://github.com/bdice)
+- Use nosync policy in gather and scatter implementations. ([#12038](https://github.com/rapidsai/cudf/pull/12038)) [@bdice](https://github.com/bdice)
+- Remove smart quotes from all docstrings. ([#12035](https://github.com/rapidsai/cudf/pull/12035)) [@bdice](https://github.com/bdice)
+- Update cuda-python dependency to 11.7.1 ([#12030](https://github.com/rapidsai/cudf/pull/12030)) [@galipremsagar](https://github.com/galipremsagar)
+- Add cython-lint to pre-commit checks. ([#12020](https://github.com/rapidsai/cudf/pull/12020)) [@bdice](https://github.com/bdice)
+- Use pragma once ([#12019](https://github.com/rapidsai/cudf/pull/12019)) [@bdice](https://github.com/bdice)
+- New GHA to add issues/prs to project board ([#12016](https://github.com/rapidsai/cudf/pull/12016)) [@jarmak-nv](https://github.com/jarmak-nv)
+- Add DataFrame.pivot_table. ([#12015](https://github.com/rapidsai/cudf/pull/12015)) [@bdice](https://github.com/bdice)
+- Rollback of `DeviceBufferLike` ([#12009](https://github.com/rapidsai/cudf/pull/12009)) [@madsbk](https://github.com/madsbk)
+- Remove default parameters for nvtext::detail functions ([#12007](https://github.com/rapidsai/cudf/pull/12007)) [@davidwendt](https://github.com/davidwendt)
+- Remove default parameters for cudf::dictionary::detail functions ([#12006](https://github.com/rapidsai/cudf/pull/12006)) [@davidwendt](https://github.com/davidwendt)
+- Remove unused `managed_allocator` ([#12005](https://github.com/rapidsai/cudf/pull/12005)) [@vyasr](https://github.com/vyasr)
+- Remove default parameters for cudf::strings::detail functions ([#12003](https://github.com/rapidsai/cudf/pull/12003)) [@davidwendt](https://github.com/davidwendt)
+- Remove unnecessary code from dask-cudf _Frame ([#12001](https://github.com/rapidsai/cudf/pull/12001)) [@rjzamora](https://github.com/rjzamora)
+- Ignore python docs build artifacts ([#12000](https://github.com/rapidsai/cudf/pull/12000)) [@galipremsagar](https://github.com/galipremsagar)
+- Use rapids-cmake for google benchmark. ([#11997](https://github.com/rapidsai/cudf/pull/11997)) [@vyasr](https://github.com/vyasr)
+- Leverage rapids_cython for more automated RPATH handling ([#11996](https://github.com/rapidsai/cudf/pull/11996)) [@vyasr](https://github.com/vyasr)
+- Remove stale labeler ([#11995](https://github.com/rapidsai/cudf/pull/11995)) [@raydouglass](https://github.com/raydouglass)
+- Move protobuf compilation to CMake ([#11986](https://github.com/rapidsai/cudf/pull/11986)) [@vyasr](https://github.com/vyasr)
+- Replace most of preprocessor usage in nvcomp adapter with `constexpr` ([#11980](https://github.com/rapidsai/cudf/pull/11980)) [@vuule](https://github.com/vuule)
+- Add missing noexcepts to column_in_metadata methods ([#11973](https://github.com/rapidsai/cudf/pull/11973)) [@vyasr](https://github.com/vyasr)
+- Pass column names to `write_csv` instead of `table_metadata` pointer ([#11972](https://github.com/rapidsai/cudf/pull/11972)) [@vuule](https://github.com/vuule)
+- Accelerate libcudf segmented sort with CUB segmented sort ([#11969](https://github.com/rapidsai/cudf/pull/11969)) [@davidwendt](https://github.com/davidwendt)
+- Feature/remove default streams ([#11967](https://github.com/rapidsai/cudf/pull/11967)) [@vyasr](https://github.com/vyasr)
+- Add pool memory resource to libcudf basic example ([#11966](https://github.com/rapidsai/cudf/pull/11966)) [@davidwendt](https://github.com/davidwendt)
+- Fix some libcudf calls to cudf::detail::gather ([#11963](https://github.com/rapidsai/cudf/pull/11963)) [@davidwendt](https://github.com/davidwendt)
+- Accept const refs instead of const unique_ptr refs in reduce and scan APIs. ([#11960](https://github.com/rapidsai/cudf/pull/11960)) [@vyasr](https://github.com/vyasr)
+- Add deprecation warning for set_allocator. ([#11958](https://github.com/rapidsai/cudf/pull/11958)) [@vyasr](https://github.com/vyasr)
+- Fix lists and structs gtests coded in namespace cudf::test ([#11956](https://github.com/rapidsai/cudf/pull/11956)) [@davidwendt](https://github.com/davidwendt)
+- Add full page indexes to Parquet writer benchmarks ([#11955](https://github.com/rapidsai/cudf/pull/11955)) [@etseidl](https://github.com/etseidl)
+- Use gather-based strings factory in cudf::strings::strip ([#11954](https://github.com/rapidsai/cudf/pull/11954)) [@davidwendt](https://github.com/davidwendt)
+- Default to equal NaNs in make_merge_sets_aggregation. ([#11952](https://github.com/rapidsai/cudf/pull/11952)) [@bdice](https://github.com/bdice)
+- Add `strip_delimiters` option to `read_text` ([#11946](https://github.com/rapidsai/cudf/pull/11946)) [@upsj](https://github.com/upsj)
+- Refactor multibyte_split `output_builder` ([#11945](https://github.com/rapidsai/cudf/pull/11945)) [@upsj](https://github.com/upsj)
+- Remove validation that requires introspection ([#11938](https://github.com/rapidsai/cudf/pull/11938)) [@vyasr](https://github.com/vyasr)
+- Add `.str.find_multiple` API ([#11928](https://github.com/rapidsai/cudf/pull/11928)) [@galipremsagar](https://github.com/galipremsagar)
+- Add regex_program class for use with all regex APIs ([#11927](https://github.com/rapidsai/cudf/pull/11927)) [@davidwendt](https://github.com/davidwendt)
+- Enable backend dispatching for Dask-DataFrame creation ([#11920](https://github.com/rapidsai/cudf/pull/11920)) [@rjzamora](https://github.com/rjzamora)
+- Performance improvement in JSON Tree traversal ([#11919](https://github.com/rapidsai/cudf/pull/11919)) [@karthikeyann](https://github.com/karthikeyann)
+- Fix some gtests incorrectly coded in namespace cudf::test (part I) ([#11917](https://github.com/rapidsai/cudf/pull/11917)) [@davidwendt](https://github.com/davidwendt)
+- Refactor pad/zfill functions for reuse with strings udf ([#11914](https://github.com/rapidsai/cudf/pull/11914)) [@davidwendt](https://github.com/davidwendt)
+- Add `nanosecond` &amp; `microsecond` to `DatetimeProperties` ([#11911](https://github.com/rapidsai/cudf/pull/11911)) [@galipremsagar](https://github.com/galipremsagar)
+- Pin mimesis version in setup.py. ([#11906](https://github.com/rapidsai/cudf/pull/11906)) [@bdice](https://github.com/bdice)
+- Error on `ListColumn` or any new unsupported column in `cudf.Index` ([#11902](https://github.com/rapidsai/cudf/pull/11902)) [@galipremsagar](https://github.com/galipremsagar)
+- Add thrust output iterator fix (1805) to thrust.patch ([#11900](https://github.com/rapidsai/cudf/pull/11900)) [@davidwendt](https://github.com/davidwendt)
+- Relax `codecov` threshold diff ([#11899](https://github.com/rapidsai/cudf/pull/11899)) [@galipremsagar](https://github.com/galipremsagar)
+- Use public APIs in STREAM_COMPACTION_NVBENCH ([#11892](https://github.com/rapidsai/cudf/pull/11892)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Add coverage for string UDF tests. ([#11891](https://github.com/rapidsai/cudf/pull/11891)) [@vyasr](https://github.com/vyasr)
+- Provide `data_chunk_source` wrapper for `datasource` ([#11886](https://github.com/rapidsai/cudf/pull/11886)) [@upsj](https://github.com/upsj)
+- Handle `multibyte_split` byte_range out-of-bounds offsets on host ([#11885](https://github.com/rapidsai/cudf/pull/11885)) [@upsj](https://github.com/upsj)
+- Add tests ensuring that cudf&#39;s default stream is always used ([#11875](https://github.com/rapidsai/cudf/pull/11875)) [@vyasr](https://github.com/vyasr)
+- Change expect_strings_empty into expect_column_empty libcudf test utility ([#11873](https://github.com/rapidsai/cudf/pull/11873)) [@davidwendt](https://github.com/davidwendt)
+- Add ngroup ([#11871](https://github.com/rapidsai/cudf/pull/11871)) [@shwina](https://github.com/shwina)
+- Reduce memory usage in nested JSON parser - tree generation ([#11864](https://github.com/rapidsai/cudf/pull/11864)) [@karthikeyann](https://github.com/karthikeyann)
+- Unpin `dask` and `distributed` for development ([#11859](https://github.com/rapidsai/cudf/pull/11859)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove unused includes for table/row_operators ([#11857](https://github.com/rapidsai/cudf/pull/11857)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Use conda-forge&#39;s `pyorc` ([#11855](https://github.com/rapidsai/cudf/pull/11855)) [@jakirkham](https://github.com/jakirkham)
+- Add libcudf strings examples ([#11849](https://github.com/rapidsai/cudf/pull/11849)) [@davidwendt](https://github.com/davidwendt)
+- Remove `cudf_io` namespace alias ([#11827](https://github.com/rapidsai/cudf/pull/11827)) [@vuule](https://github.com/vuule)
+- Test/remove thrust vector usage ([#11813](https://github.com/rapidsai/cudf/pull/11813)) [@vyasr](https://github.com/vyasr)
+- Add BGZIP reader to python `read_text` ([#11802](https://github.com/rapidsai/cudf/pull/11802)) [@upsj](https://github.com/upsj)
+- Merge branch-22.10 into branch-22.12 ([#11801](https://github.com/rapidsai/cudf/pull/11801)) [@davidwendt](https://github.com/davidwendt)
+- Fix compile warning from CUDF_FUNC_RANGE in a member function ([#11798](https://github.com/rapidsai/cudf/pull/11798)) [@davidwendt](https://github.com/davidwendt)
+- Update cudf JNI version to 22.12.0-SNAPSHOT ([#11764](https://github.com/rapidsai/cudf/pull/11764)) [@pxLi](https://github.com/pxLi)
+- Update flake8 to 5.0.4 and use flake8-force to check Cython. ([#11736](https://github.com/rapidsai/cudf/pull/11736)) [@bdice](https://github.com/bdice)
+- Add BGZIP multibyte_split benchmark ([#11723](https://github.com/rapidsai/cudf/pull/11723)) [@upsj](https://github.com/upsj)
+- Bifurcate Dependency Lists ([#11674](https://github.com/rapidsai/cudf/pull/11674)) [@bdice](https://github.com/bdice)
+- Default to equal NaNs in make_collect_set_aggregation. ([#11621](https://github.com/rapidsai/cudf/pull/11621)) [@bdice](https://github.com/bdice)
+- Conform &quot;bench_isin&quot; to match generator column names ([#11549](https://github.com/rapidsai/cudf/pull/11549)) [@GregoryKimball](https://github.com/GregoryKimball)
+- Removing int8 column option from parquet byte_array writing ([#11539](https://github.com/rapidsai/cudf/pull/11539)) [@hyperbolic2346](https://github.com/hyperbolic2346)
+- Add checks for HLG layers in dask-cudf groupby tests ([#10853](https://github.com/rapidsai/cudf/pull/10853)) [@charlesbluca](https://github.com/charlesbluca)
+- part1: Simplify BaseIndex to an abstract class ([#10389](https://github.com/rapidsai/cudf/pull/10389)) [@skirui-source](https://github.com/skirui-source)
+- Make all `nvcc` warnings into errors ([#8916](https://github.com/rapidsai/cudf/pull/8916)) [@trxcllnt](https://github.com/trxcllnt)
 
 # cuDF 22.10.00 (12 Oct 2022)